m/test/e2e: split out tests into subpackages

The end-to-end tests have grown large enough that they merit their own
test targets. To make this more Go-idiomatic, we split away the tests
not just int separate Bazel targets, but also Go packages.

We also add per-test resource requests for Bazel, including a new
resource kind (iops). This makes the tests more deterministic and allows
use to eg. use --runs_per_test=10 to deflake test logic without hitting
resource contention issues.

//metropolis/test/e2e/suites/core:core_test                              PASSED in 35.1s
  Stats over 10 runs: max = 35.1s, min = 26.6s, avg = 31.9s, dev = 2.6s
//metropolis/test/e2e/suites/ha:ha_test                                  PASSED in 114.6s
  Stats over 10 runs: max = 114.6s, min = 90.1s, avg = 100.9s, dev = 7.6s
//metropolis/test/e2e/suites/ha_cold:ha_cold_test                        PASSED in 67.8s
  Stats over 10 runs: max = 67.8s, min = 55.5s, avg = 62.0s, dev = 4.1s
//metropolis/test/e2e/suites/kubernetes:kubernetes_test                  PASSED in 80.9s
  Stats over 10 runs: max = 80.9s, min = 58.8s, avg = 68.6s, dev = 6.0s

Change-Id: I8f31e09f599fd90c9941e2b69f36789817fa90ce
Reviewed-on: https://review.monogon.dev/c/monogon/+/3086
Reviewed-by: Jan Schär <jan@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/test/e2e/suites/kubernetes/run_test.go b/metropolis/test/e2e/suites/kubernetes/run_test.go
new file mode 100644
index 0000000..ec38aa3
--- /dev/null
+++ b/metropolis/test/e2e/suites/kubernetes/run_test.go
@@ -0,0 +1,368 @@
+package kubernetes
+
+import (
+	"context"
+	"crypto/tls"
+	"crypto/x509"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	_ "net/http/pprof"
+	"net/url"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/bazelbuild/rules_go/go/runfiles"
+	corev1 "k8s.io/api/core/v1"
+	kerrors "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
+
+	"source.monogon.dev/metropolis/pkg/localregistry"
+	"source.monogon.dev/metropolis/test/launch/cluster"
+	"source.monogon.dev/metropolis/test/util"
+
+	common "source.monogon.dev/metropolis/node"
+)
+
+const (
+	// Timeout for the global test context.
+	//
+	// Bazel would eventually time out the test after 900s ("large") if, for
+	// some reason, the context cancellation fails to abort it.
+	globalTestTimeout = 600 * time.Second
+
+	// Timeouts for individual end-to-end tests of different sizes.
+	smallTestTimeout = 60 * time.Second
+	largeTestTimeout = 120 * time.Second
+)
+
+// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
+//
+// The tests are performed against an in-memory cluster.
+func TestE2EKubernetes(t *testing.T) {
+	// Set a global timeout to make sure this terminates
+	ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
+	defer cancel()
+
+	rPath, err := runfiles.Rlocation("_main/metropolis/test/e2e/testimages_manifest.prototxt")
+	if err != nil {
+		t.Fatalf("Resolving registry manifest failed: %v", err)
+	}
+	df, err := os.ReadFile(rPath)
+	if err != nil {
+		t.Fatalf("Reading registry manifest failed: %v", err)
+	}
+	lr, err := localregistry.FromBazelManifest(df)
+	if err != nil {
+		t.Fatalf("Creating test image registry failed: %v", err)
+	}
+
+	// Launch cluster.
+	clusterOptions := cluster.ClusterOptions{
+		NumNodes:      2,
+		LocalRegistry: lr,
+	}
+	cluster, err := cluster.LaunchCluster(ctx, clusterOptions)
+	if err != nil {
+		t.Fatalf("LaunchCluster failed: %v", err)
+	}
+	defer func() {
+		err := cluster.Close()
+		if err != nil {
+			t.Fatalf("cluster Close failed: %v", err)
+		}
+	}()
+
+	clientSet, err := cluster.GetKubeClientSet()
+	if err != nil {
+		t.Fatal(err)
+	}
+	util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
+		// Make everything but the first node into KubernetesWorkers.
+		for i := 1; i < clusterOptions.NumNodes; i++ {
+			err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
+			if err != nil {
+				return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
+			}
+		}
+		return nil
+	})
+	util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
+		nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+		if err != nil {
+			return err
+		}
+		if len(nodes.Items) < 1 {
+			return errors.New("node not yet registered")
+		}
+		node := nodes.Items[0]
+		for _, cond := range node.Status.Conditions {
+			if cond.Type != corev1.NodeReady {
+				continue
+			}
+			if cond.Status != corev1.ConditionTrue {
+				return fmt.Errorf("node not ready: %v", cond.Message)
+			}
+		}
+		return nil
+	})
+	util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
+		_, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
+		return err
+	})
+	util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+		res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
+		if err != nil {
+			return err
+		}
+		if len(res.Items) == 0 {
+			return errors.New("pod didn't get created")
+		}
+		pod := res.Items[0]
+		if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+			return nil
+		}
+		events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+		if err != nil || len(events.Items) == 0 {
+			return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+		} else {
+			return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+		}
+	})
+	util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
+		deployment := makeTestDeploymentSpec("test-deploy-2")
+		gvisorStr := "gvisor"
+		deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
+		_, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
+		return err
+	})
+	util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
+		res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
+		if err != nil {
+			return err
+		}
+		if len(res.Items) == 0 {
+			return errors.New("pod didn't get created")
+		}
+		pod := res.Items[0]
+		if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+			return nil
+		}
+		events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+		if err != nil || len(events.Items) == 0 {
+			return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+		} else {
+			var errorMsg strings.Builder
+			for _, msg := range events.Items {
+				errorMsg.WriteString(" | ")
+				errorMsg.WriteString(msg.Message)
+			}
+			return fmt.Errorf("pod is not ready: %v", errorMsg.String())
+		}
+	})
+	util.TestEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
+		_, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1", corev1.PersistentVolumeFilesystem), metav1.CreateOptions{})
+		return err
+	})
+	util.TestEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+		res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
+		if err != nil {
+			return err
+		}
+		if len(res.Items) == 0 {
+			return errors.New("pod didn't get created")
+		}
+		pod := res.Items[0]
+		if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+			return nil
+		}
+		events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+		if err != nil || len(events.Items) == 0 {
+			return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+		} else {
+			return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+		}
+	})
+	util.TestEventual(t, "Simple StatefulSet with Block PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
+		_, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-2", corev1.PersistentVolumeBlock), metav1.CreateOptions{})
+		return err
+	})
+	util.TestEventual(t, "Simple StatefulSet with Block PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+		res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-2"})
+		if err != nil {
+			return err
+		}
+		if len(res.Items) == 0 {
+			return errors.New("pod didn't get created")
+		}
+		pod := res.Items[0]
+		if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+			return nil
+		}
+		events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+		if err != nil || len(events.Items) == 0 {
+			return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+		} else {
+			return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+		}
+	})
+	util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
+		_, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
+		return err
+	})
+	util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
+		res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
+		if err != nil {
+			return err
+		}
+		if res.Status.Failed > 0 {
+			pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
+				LabelSelector: "job-name=selftest",
+			})
+			if err != nil {
+				return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
+			}
+			if len(pods.Items) < 1 {
+				return fmt.Errorf("job failed but pod does not exist")
+			}
+			lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
+			if err != nil {
+				return fmt.Errorf("job failed but could not get logs: %w", err)
+			}
+			if len(lines) > 0 {
+				return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
+			}
+			return util.Permanent(fmt.Errorf("job failed, empty log"))
+		}
+		if res.Status.Succeeded > 0 {
+			return nil
+		}
+		return fmt.Errorf("job still running")
+	})
+	util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
+		_, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
+		if err != nil && !kerrors.IsAlreadyExists(err) {
+			return err
+		}
+		_, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
+		if err != nil && !kerrors.IsAlreadyExists(err) {
+			return err
+		}
+		return nil
+	})
+	util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
+		nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+		if err != nil {
+			return err
+		}
+		// Use a new client for each attempt
+		hc := http.Client{
+			Timeout: 2 * time.Second,
+			Transport: &http.Transport{
+				Dial: cluster.SOCKSDialer.Dial,
+			},
+		}
+		for _, n := range nodes.Items {
+			var addr string
+			for _, a := range n.Status.Addresses {
+				if a.Type == corev1.NodeInternalIP {
+					addr = a.Address
+				}
+			}
+			u := url.URL{Scheme: "http", Host: addr, Path: "/"}
+			res, err := hc.Get(u.String())
+			if err != nil {
+				return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
+			}
+			if res.StatusCode != http.StatusOK {
+				return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
+			}
+			t.Logf("Got response from %q", n.Name)
+		}
+		return nil
+	})
+	util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
+		pool := x509.NewCertPool()
+		pool.AddCert(cluster.CACertificate)
+		cl := http.Client{
+			Transport: &http.Transport{
+				TLSClientConfig: &tls.Config{
+					Certificates: []tls.Certificate{cluster.Owner},
+					RootCAs:      pool,
+				},
+				DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
+					return cluster.DialNode(ctx, addr)
+				},
+			},
+		}
+		u := url.URL{
+			Scheme: "https",
+			Host:   net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
+			Path:   "/metrics/containerd",
+		}
+		res, err := cl.Get(u.String())
+		if err != nil {
+			return err
+		}
+		defer res.Body.Close()
+		if res.StatusCode != 200 {
+			return fmt.Errorf("status code %d", res.StatusCode)
+		}
+
+		body, err := io.ReadAll(res.Body)
+		if err != nil {
+			return err
+		}
+		needle := "containerd_build_info_total"
+		if !strings.Contains(string(body), needle) {
+			return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
+		}
+		return nil
+	})
+	if os.Getenv("HAVE_NESTED_KVM") != "" {
+		util.TestEventual(t, "Pod for KVM/QEMU smoke test", ctx, smallTestTimeout, func(ctx context.Context) error {
+			runcRuntimeClass := "runc"
+			_, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name: "vm-smoketest",
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{{
+						Name:            "vm-smoketest",
+						ImagePullPolicy: corev1.PullNever,
+						Image:           "test.monogon.internal/metropolis/vm/smoketest:smoketest_container",
+						Resources: corev1.ResourceRequirements{
+							Limits: corev1.ResourceList{
+								"devices.monogon.dev/kvm": *resource.NewQuantity(1, ""),
+							},
+						},
+					}},
+					RuntimeClassName: &runcRuntimeClass,
+					RestartPolicy:    corev1.RestartPolicyNever,
+				},
+			}, metav1.CreateOptions{})
+			return err
+		})
+		util.TestEventual(t, "KVM/QEMU smoke test completion", ctx, smallTestTimeout, func(ctx context.Context) error {
+			pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "vm-smoketest", metav1.GetOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to get pod: %w", err)
+			}
+			if pod.Status.Phase == corev1.PodSucceeded {
+				return nil
+			}
+			events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+			if err != nil || len(events.Items) == 0 {
+				return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+			} else {
+				return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
+			}
+		})
+	}
+}