blob: ee39c1cbc8e6d024f90942a1f4e4cb0f479766ff [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanski99b02142024-04-17 16:33:28 +02004package kubernetes
5
6import (
7 "context"
8 "crypto/tls"
9 "crypto/x509"
10 "errors"
11 "fmt"
12 "io"
13 "net"
14 "net/http"
15 _ "net/http/pprof"
16 "net/url"
17 "os"
18 "strings"
19 "testing"
20 "time"
21
22 "github.com/bazelbuild/rules_go/go/runfiles"
Serge Bazanski1e399142024-10-22 10:58:15 +000023 "google.golang.org/protobuf/types/known/fieldmaskpb"
Serge Bazanski99b02142024-04-17 16:33:28 +020024 corev1 "k8s.io/api/core/v1"
Lorenz Brun52700ae2025-01-28 15:07:08 +010025 nwkv1 "k8s.io/api/networking/v1"
Serge Bazanski99b02142024-04-17 16:33:28 +020026 kerrors "k8s.io/apimachinery/pkg/api/errors"
Serge Bazanski99b02142024-04-17 16:33:28 +020027 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Lorenz Brun52700ae2025-01-28 15:07:08 +010028 "k8s.io/apimachinery/pkg/util/intstr"
Serge Bazanski99b02142024-04-17 16:33:28 +020029 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
Lorenz Brun2ecccae2024-11-27 22:03:35 +010030 "k8s.io/utils/ptr"
Serge Bazanski99b02142024-04-17 16:33:28 +020031
Lorenz Brun732a8842024-08-26 23:25:37 +020032 common "source.monogon.dev/metropolis/node"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000033 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun732a8842024-08-26 23:25:37 +020034 cpb "source.monogon.dev/metropolis/proto/common"
Lorenz Brunde57e6f2025-01-08 16:34:08 +000035 "source.monogon.dev/metropolis/test/e2e/connectivity"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020036 mlaunch "source.monogon.dev/metropolis/test/launch"
37 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020038 "source.monogon.dev/metropolis/test/util"
Serge Bazanski99b02142024-04-17 16:33:28 +020039)
40
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000041var (
42 // These are filled by bazel at linking time with the canonical path of
43 // their corresponding file. Inside the init function we resolve it
44 // with the rules_go runfiles package to the real path.
45 xTestImagesManifestPath string
46)
47
48func init() {
49 var err error
50 for _, path := range []*string{
51 &xTestImagesManifestPath,
52 } {
53 *path, err = runfiles.Rlocation(*path)
54 if err != nil {
55 panic(err)
56 }
57 }
58}
59
Serge Bazanski99b02142024-04-17 16:33:28 +020060const (
61 // Timeout for the global test context.
62 //
63 // Bazel would eventually time out the test after 900s ("large") if, for
64 // some reason, the context cancellation fails to abort it.
65 globalTestTimeout = 600 * time.Second
66
67 // Timeouts for individual end-to-end tests of different sizes.
68 smallTestTimeout = 60 * time.Second
69 largeTestTimeout = 120 * time.Second
70)
71
Serge Bazanski6d1ff362024-09-30 15:15:31 +000072// TestE2EKubernetesLabels verifies that Kubernetes node labels are being updated
73// when the cluster state changes.
74func TestE2EKubernetesLabels(t *testing.T) {
75 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
76 defer cancel()
77
78 clusterOptions := mlaunch.ClusterOptions{
79 NumNodes: 2,
80 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +010081 ClusterDomain: "cluster.test",
Serge Bazanski6d1ff362024-09-30 15:15:31 +000082 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
83 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
Serge Bazanski78567602024-10-31 13:42:04 +000084 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
85 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanskie99638e2024-09-30 17:06:44 +000086 {Regexp: `^test\.monogon\.dev/`},
87 },
88 },
Serge Bazanski6d1ff362024-09-30 15:15:31 +000089 },
90 }
91 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
92 if err != nil {
93 t.Fatalf("LaunchCluster failed: %v", err)
94 }
95 defer func() {
96 err := cluster.Close()
97 if err != nil {
98 t.Fatalf("cluster Close failed: %v", err)
99 }
100 }()
101
102 con, err := cluster.CuratorClient()
103 if err != nil {
104 t.Fatalf("Could not get curator client: %v", err)
105 }
106 mgmt := apb.NewManagementClient(con)
Lorenz Brun8f1254d2025-01-28 14:10:05 +0100107 clientSet, _, err := cluster.GetKubeClientSet()
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000108 if err != nil {
109 t.Fatal(err)
110 }
111
112 getLabelsForNode := func(nid string) common.Labels {
113 node, err := clientSet.CoreV1().Nodes().Get(ctx, nid, metav1.GetOptions{})
114 if kerrors.IsNotFound(err) {
115 return nil
116 }
117 if err != nil {
118 t.Fatalf("Could not get node %s: %v", nid, err)
119 return nil
120 }
121 return common.Labels(node.Labels).Filter(func(k, v string) bool {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000122 if strings.HasPrefix(k, "node-role.kubernetes.io/") {
123 return true
124 }
125 if strings.HasPrefix(k, "test.monogon.dev/") {
126 return true
127 }
128 return false
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000129 })
130 }
131
132 // Nodes should have no labels at first.
133 for _, nid := range cluster.NodeIDs {
134 if labels := getLabelsForNode(nid); !labels.Equals(nil) {
135 t.Errorf("Node %s should have no labels, has %s", nid, labels)
136 }
137 }
138 // Nominate both nodes to be Kubernetes workers.
139 for _, nid := range cluster.NodeIDs {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000140 _, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
141 Node: &apb.UpdateNodeRolesRequest_Id{
142 Id: nid,
143 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100144 KubernetesWorker: ptr.To(true),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000145 })
146 if err != nil {
147 t.Fatalf("Could not make %s a KubernetesWorker: %v", nid, err)
148 }
149 }
150
Jan Schär36f03752024-11-19 17:41:05 +0100151 util.MustTestEventual(t, "Labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000152 // Nodes should have role labels now.
153 for _, nid := range cluster.NodeIDs {
154 want := common.Labels{
155 "node-role.kubernetes.io/KubernetesWorker": "",
156 }
157 if nid == cluster.NodeIDs[0] {
158 want["node-role.kubernetes.io/KubernetesController"] = ""
159 want["node-role.kubernetes.io/ConsensusMember"] = ""
160 }
161 if labels := getLabelsForNode(nid); !want.Equals(labels) {
162 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
163 }
164 }
165 return nil
166 })
167
168 // Remove KubernetesWorker from first node again. It will stay in k8s (arguably,
169 // this is a bug) but its role label should be removed.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000170 _, err = mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
171 Node: &apb.UpdateNodeRolesRequest_Id{
172 Id: cluster.NodeIDs[0],
173 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100174 KubernetesWorker: ptr.To(false),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000175 })
176 if err != nil {
177 t.Fatalf("Could not remove KubernetesWorker from %s: %v", cluster.NodeIDs[0], err)
178 }
179
Jan Schär36f03752024-11-19 17:41:05 +0100180 util.MustTestEventual(t, "Labels removed", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000181 for _, nid := range cluster.NodeIDs {
182 want := make(common.Labels)
183 if nid == cluster.NodeIDs[0] {
184 want["node-role.kubernetes.io/KubernetesController"] = ""
185 want["node-role.kubernetes.io/ConsensusMember"] = ""
186 } else {
187 want["node-role.kubernetes.io/KubernetesWorker"] = ""
188 }
189 if labels := getLabelsForNode(nid); !want.Equals(labels) {
190 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
191 }
192 }
193 return nil
194 })
Serge Bazanskie99638e2024-09-30 17:06:44 +0000195
196 // Add Metropolis node label, ensure it gets reflected on the Kubernetes node.
197 _, err = mgmt.UpdateNodeLabels(ctx, &apb.UpdateNodeLabelsRequest{
198 Node: &apb.UpdateNodeLabelsRequest_Id{
199 Id: cluster.NodeIDs[1],
200 },
201 Upsert: []*apb.UpdateNodeLabelsRequest_Pair{
202 {Key: "test.monogon.dev/foo", Value: "bar"},
203 },
204 })
205
Jan Schär36f03752024-11-19 17:41:05 +0100206 util.MustTestEventual(t, "Metropolis labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000207 if err != nil {
208 t.Fatalf("Could not add label to node: %v", err)
209 }
210 want := common.Labels{
211 "node-role.kubernetes.io/KubernetesWorker": "",
212 "test.monogon.dev/foo": "bar",
213 }
214 if labels := getLabelsForNode(cluster.NodeIDs[1]); !want.Equals(labels) {
Serge Bazanski1e399142024-10-22 10:58:15 +0000215 return fmt.Errorf("node %s should have labels %s, has %s", cluster.NodeIDs[1], want, labels)
Serge Bazanskie99638e2024-09-30 17:06:44 +0000216 }
217 return nil
218 })
Serge Bazanski1e399142024-10-22 10:58:15 +0000219
220 // Reconfigure node label rules.
221 _, err = mgmt.ConfigureCluster(ctx, &apb.ConfigureClusterRequest{
222 BaseConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000223 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
224 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanski1e399142024-10-22 10:58:15 +0000225 {Regexp: `^test\.monogon\.dev/`},
226 },
227 },
228 },
229 NewConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000230 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{},
Serge Bazanski1e399142024-10-22 10:58:15 +0000231 },
232 UpdateMask: &fieldmaskpb.FieldMask{
Serge Bazanski78567602024-10-31 13:42:04 +0000233 Paths: []string{"kubernetes.node_labels_to_synchronize"},
Serge Bazanski1e399142024-10-22 10:58:15 +0000234 },
235 })
236 if err != nil {
237 t.Fatalf("Could not update cluster configuration: %v", err)
238 }
239
240 ci, err := mgmt.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
241 if err != nil {
242 t.Fatalf("Could not get cluster info")
243 }
244 // See if the config changed.
Serge Bazanski78567602024-10-31 13:42:04 +0000245 if rules := ci.ClusterConfiguration.Kubernetes.NodeLabelsToSynchronize; len(rules) != 0 {
Serge Bazanski1e399142024-10-22 10:58:15 +0000246 t.Fatalf("Wanted 0 label rules in config after reconfiguration, have %d: %v", len(rules), rules)
247 }
248 // TODO: ensure new rules get applied, but that will require watching the cluster
249 // config for changes in the labelmaker.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000250}
251
Serge Bazanski99b02142024-04-17 16:33:28 +0200252// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
253//
254// The tests are performed against an in-memory cluster.
255func TestE2EKubernetes(t *testing.T) {
256 // Set a global timeout to make sure this terminates
257 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
258 defer cancel()
259
Tim Windelschmidt82e6af72024-07-23 00:05:42 +0000260 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +0200261 if err != nil {
262 t.Fatalf("Reading registry manifest failed: %v", err)
263 }
264 lr, err := localregistry.FromBazelManifest(df)
265 if err != nil {
266 t.Fatalf("Creating test image registry failed: %v", err)
267 }
268
269 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200270 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +0200271 NumNodes: 2,
272 LocalRegistry: lr,
Lorenz Brun732a8842024-08-26 23:25:37 +0200273 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +0100274 ClusterDomain: "cluster.test",
Lorenz Brun732a8842024-08-26 23:25:37 +0200275 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
276 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
277 },
Serge Bazanski99b02142024-04-17 16:33:28 +0200278 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200279 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +0200280 if err != nil {
281 t.Fatalf("LaunchCluster failed: %v", err)
282 }
283 defer func() {
284 err := cluster.Close()
285 if err != nil {
286 t.Fatalf("cluster Close failed: %v", err)
287 }
288 }()
289
Lorenz Brunde57e6f2025-01-08 16:34:08 +0000290 clientSet, restConfig, err := cluster.GetKubeClientSet()
Serge Bazanski99b02142024-04-17 16:33:28 +0200291 if err != nil {
292 t.Fatal(err)
293 }
294 util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
295 // Make everything but the first node into KubernetesWorkers.
296 for i := 1; i < clusterOptions.NumNodes; i++ {
297 err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
298 if err != nil {
299 return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
300 }
301 }
302 return nil
303 })
304 util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
305 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
306 if err != nil {
307 return err
308 }
309 if len(nodes.Items) < 1 {
310 return errors.New("node not yet registered")
311 }
312 node := nodes.Items[0]
313 for _, cond := range node.Status.Conditions {
314 if cond.Type != corev1.NodeReady {
315 continue
316 }
317 if cond.Status != corev1.ConditionTrue {
318 return fmt.Errorf("node not ready: %v", cond.Message)
319 }
320 }
321 return nil
322 })
323 util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
324 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
325 return err
326 })
327 util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
328 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
329 if err != nil {
330 return err
331 }
332 if len(res.Items) == 0 {
333 return errors.New("pod didn't get created")
334 }
335 pod := res.Items[0]
336 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
337 return nil
338 }
339 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
340 if err != nil || len(events.Items) == 0 {
341 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
342 } else {
343 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
344 }
345 })
346 util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
347 deployment := makeTestDeploymentSpec("test-deploy-2")
348 gvisorStr := "gvisor"
349 deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
350 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
351 return err
352 })
353 util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
354 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
355 if err != nil {
356 return err
357 }
358 if len(res.Items) == 0 {
359 return errors.New("pod didn't get created")
360 }
361 pod := res.Items[0]
362 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
363 return nil
364 }
365 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
366 if err != nil || len(events.Items) == 0 {
367 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
368 } else {
369 var errorMsg strings.Builder
370 for _, msg := range events.Items {
371 errorMsg.WriteString(" | ")
372 errorMsg.WriteString(msg.Message)
373 }
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200374 return fmt.Errorf("pod is not ready: %s", errorMsg.String())
Serge Bazanski99b02142024-04-17 16:33:28 +0200375 }
376 })
Lorenz Brunde57e6f2025-01-08 16:34:08 +0000377 t.Run("Connectivity Smoke Tests", func(t *testing.T) {
378 ct := connectivity.SetupTest(t, &connectivity.TestSpec{
379 Name: "connectivity-smoke",
380 ClientSet: clientSet,
381 RESTConfig: restConfig,
382 NumPods: 2,
383 ExtraPodConfig: func(i int, pod *corev1.Pod) {
384 // Spread pods out over nodes to test inter-node network
385 pod.Labels = make(map[string]string)
386 pod.Labels["name"] = "connectivity-smoketest"
387 pod.Spec.TopologySpreadConstraints = []corev1.TopologySpreadConstraint{{
388 MaxSkew: 1,
389 TopologyKey: "kubernetes.io/hostname",
390 WhenUnsatisfiable: corev1.DoNotSchedule,
391 LabelSelector: metav1.SetAsLabelSelector(pod.Labels),
392 }}
393 },
394 })
395 ct.TestPodConnectivity(t, 0, 1, 1234, connectivity.ExpectedSuccess)
396 })
Lorenz Brun52700ae2025-01-28 15:07:08 +0100397 t.Run("Network Policy Smoke Test", func(t *testing.T) {
398 ct := connectivity.SetupTest(t, &connectivity.TestSpec{
399 Name: "npc-smoke",
400 ClientSet: clientSet,
401 RESTConfig: restConfig,
402 NumPods: 2,
403 ExtraPodConfig: func(i int, pod *corev1.Pod) {
404 // Spread pods out over nodes to test inter-node network
405 pod.Labels = make(map[string]string)
406 pod.Labels["name"] = "npc-smoke"
407 pod.Spec.TopologySpreadConstraints = []corev1.TopologySpreadConstraint{{
408 MaxSkew: 1,
409 TopologyKey: "kubernetes.io/hostname",
410 WhenUnsatisfiable: corev1.DoNotSchedule,
411 LabelSelector: metav1.SetAsLabelSelector(pod.Labels),
412 }}
413 },
414 })
415 // Test connectivity before applying network policy
416 ct.TestPodConnectivity(t, 0, 1, 1234, connectivity.ExpectedSuccess)
417 ct.TestPodConnectivity(t, 0, 1, 1235, connectivity.ExpectedSuccess)
418 nwp := &nwkv1.NetworkPolicy{
419 ObjectMeta: metav1.ObjectMeta{
420 Name: "npc-smoke",
421 },
422 Spec: nwkv1.NetworkPolicySpec{
423 PodSelector: metav1.LabelSelector{MatchLabels: map[string]string{"name": "npc-smoke"}},
424 Ingress: []nwkv1.NetworkPolicyIngressRule{{
425 Ports: []nwkv1.NetworkPolicyPort{{
426 Protocol: ptr.To(corev1.ProtocolTCP),
427 Port: &intstr.IntOrString{Type: intstr.Int, IntVal: 1234},
428 }},
429 From: []nwkv1.NetworkPolicyPeer{{
430 PodSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"name": "npc-smoke"}},
431 }},
432 }},
433 },
434 }
435 if _, err := clientSet.NetworkingV1().NetworkPolicies("default").Create(context.Background(), nwp, metav1.CreateOptions{}); err != nil {
436 t.Fatal(err)
437 }
438 // Check if policy is in effect
439 ct.TestPodConnectivityEventual(t, 0, 1, 1235, connectivity.ExpectedReject, 30*time.Second)
440 ct.TestPodConnectivity(t, 0, 1, 1234, connectivity.ExpectedSuccess)
441 })
Jan Schär73beb692024-11-27 17:47:09 +0100442 for _, runtimeClass := range []string{"runc", "gvisor"} {
443 statefulSetName := fmt.Sprintf("test-statefulset-%s", runtimeClass)
444 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
445 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet(statefulSetName, runtimeClass), metav1.CreateOptions{})
Serge Bazanski99b02142024-04-17 16:33:28 +0200446 return err
Jan Schär73beb692024-11-27 17:47:09 +0100447 })
448 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests successful", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
449 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("name=%s", statefulSetName)})
450 if err != nil {
451 return err
Jan Schär652c2ad2024-11-19 17:40:50 +0100452 }
Jan Schär73beb692024-11-27 17:47:09 +0100453 if len(res.Items) == 0 {
454 return errors.New("pod didn't get created")
455 }
456 pod := res.Items[0]
457 lines, err := getPodLogLines(ctx, clientSet, pod.Name, 50)
458 if err != nil {
459 return fmt.Errorf("could not get logs: %w", err)
460 }
461 if len(lines) > 0 {
462 switch lines[len(lines)-1] {
463 case "[TESTS-PASSED]":
464 return nil
465 case "[TESTS-FAILED]":
466 return util.Permanent(fmt.Errorf("tests failed, log:\n %s", strings.Join(lines, "\n ")))
467 }
468 }
469 return fmt.Errorf("pod is not ready: %v, log:\n %s", pod.Status.Phase, strings.Join(lines, "\n "))
470 })
471 }
Lorenz Brun2ecccae2024-11-27 22:03:35 +0100472 util.TestEventual(t, "Deployment in user namespace", ctx, largeTestTimeout, func(ctx context.Context) error {
473 deployment := makeTestDeploymentSpec("test-userns-1")
474 deployment.Spec.Template.Spec.HostUsers = ptr.To(false)
475 deployment.Spec.Template.Spec.Containers[0].ReadinessProbe.HTTPGet.Path = "/ready_userns"
476 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
477 return err
478 })
479 util.TestEventual(t, "Deployment in user namespace is running", ctx, largeTestTimeout, func(ctx context.Context) error {
480 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-userns-1"})
481 if err != nil {
482 return err
483 }
484 if len(res.Items) == 0 {
485 return errors.New("pod didn't get created")
486 }
487 pod := res.Items[0]
488 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
489 return nil
490 }
491 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
492 if err != nil || len(events.Items) == 0 {
493 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
494 } else {
495 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
496 }
497 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200498 util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
499 _, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
500 return err
501 })
502 util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
503 res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
504 if err != nil {
505 return err
506 }
507 if res.Status.Failed > 0 {
508 pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
509 LabelSelector: "job-name=selftest",
510 })
511 if err != nil {
512 return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
513 }
514 if len(pods.Items) < 1 {
515 return fmt.Errorf("job failed but pod does not exist")
516 }
517 lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
518 if err != nil {
519 return fmt.Errorf("job failed but could not get logs: %w", err)
520 }
521 if len(lines) > 0 {
522 return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
523 }
524 return util.Permanent(fmt.Errorf("job failed, empty log"))
525 }
526 if res.Status.Succeeded > 0 {
527 return nil
528 }
529 return fmt.Errorf("job still running")
530 })
531 util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
532 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
533 if err != nil && !kerrors.IsAlreadyExists(err) {
534 return err
535 }
536 _, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
537 if err != nil && !kerrors.IsAlreadyExists(err) {
538 return err
539 }
540 return nil
541 })
542 util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
543 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
544 if err != nil {
545 return err
546 }
547 // Use a new client for each attempt
548 hc := http.Client{
549 Timeout: 2 * time.Second,
550 Transport: &http.Transport{
551 Dial: cluster.SOCKSDialer.Dial,
552 },
553 }
554 for _, n := range nodes.Items {
555 var addr string
556 for _, a := range n.Status.Addresses {
557 if a.Type == corev1.NodeInternalIP {
558 addr = a.Address
559 }
560 }
561 u := url.URL{Scheme: "http", Host: addr, Path: "/"}
562 res, err := hc.Get(u.String())
563 if err != nil {
564 return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
565 }
566 if res.StatusCode != http.StatusOK {
567 return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
568 }
569 t.Logf("Got response from %q", n.Name)
570 }
571 return nil
572 })
573 util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
574 pool := x509.NewCertPool()
575 pool.AddCert(cluster.CACertificate)
576 cl := http.Client{
577 Transport: &http.Transport{
578 TLSClientConfig: &tls.Config{
579 Certificates: []tls.Certificate{cluster.Owner},
580 RootCAs: pool,
581 },
582 DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
583 return cluster.DialNode(ctx, addr)
584 },
585 },
586 }
587 u := url.URL{
588 Scheme: "https",
589 Host: net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
590 Path: "/metrics/containerd",
591 }
592 res, err := cl.Get(u.String())
593 if err != nil {
594 return err
595 }
596 defer res.Body.Close()
597 if res.StatusCode != 200 {
598 return fmt.Errorf("status code %d", res.StatusCode)
599 }
600
601 body, err := io.ReadAll(res.Body)
602 if err != nil {
603 return err
604 }
605 needle := "containerd_build_info_total"
606 if !strings.Contains(string(body), needle) {
607 return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
608 }
609 return nil
610 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200611}