blob: 8333567e6e6423cbcc8868e4ca9e176e55bee8a8 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanski99b02142024-04-17 16:33:28 +02004package kubernetes
5
6import (
7 "context"
8 "crypto/tls"
9 "crypto/x509"
10 "errors"
11 "fmt"
12 "io"
13 "net"
14 "net/http"
15 _ "net/http/pprof"
16 "net/url"
17 "os"
Jan Schärb00f7f92025-03-06 17:27:22 +010018 "slices"
Serge Bazanski99b02142024-04-17 16:33:28 +020019 "strings"
20 "testing"
21 "time"
22
23 "github.com/bazelbuild/rules_go/go/runfiles"
Serge Bazanski1e399142024-10-22 10:58:15 +000024 "google.golang.org/protobuf/types/known/fieldmaskpb"
Serge Bazanski99b02142024-04-17 16:33:28 +020025 corev1 "k8s.io/api/core/v1"
Lorenz Brun52700ae2025-01-28 15:07:08 +010026 nwkv1 "k8s.io/api/networking/v1"
Serge Bazanski99b02142024-04-17 16:33:28 +020027 kerrors "k8s.io/apimachinery/pkg/api/errors"
Serge Bazanski99b02142024-04-17 16:33:28 +020028 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Jan Schärb00f7f92025-03-06 17:27:22 +010029 "k8s.io/apimachinery/pkg/types"
Lorenz Brun52700ae2025-01-28 15:07:08 +010030 "k8s.io/apimachinery/pkg/util/intstr"
Serge Bazanski99b02142024-04-17 16:33:28 +020031 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
Lorenz Brun2ecccae2024-11-27 22:03:35 +010032 "k8s.io/utils/ptr"
Serge Bazanski99b02142024-04-17 16:33:28 +020033
Lorenz Brun732a8842024-08-26 23:25:37 +020034 common "source.monogon.dev/metropolis/node"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000035 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun732a8842024-08-26 23:25:37 +020036 cpb "source.monogon.dev/metropolis/proto/common"
Lorenz Brunde57e6f2025-01-08 16:34:08 +000037 "source.monogon.dev/metropolis/test/e2e/connectivity"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020038 mlaunch "source.monogon.dev/metropolis/test/launch"
39 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020040 "source.monogon.dev/metropolis/test/util"
Serge Bazanski99b02142024-04-17 16:33:28 +020041)
42
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000043var (
44 // These are filled by bazel at linking time with the canonical path of
45 // their corresponding file. Inside the init function we resolve it
46 // with the rules_go runfiles package to the real path.
47 xTestImagesManifestPath string
48)
49
50func init() {
51 var err error
52 for _, path := range []*string{
53 &xTestImagesManifestPath,
54 } {
55 *path, err = runfiles.Rlocation(*path)
56 if err != nil {
57 panic(err)
58 }
59 }
60}
61
Serge Bazanski99b02142024-04-17 16:33:28 +020062const (
63 // Timeout for the global test context.
64 //
65 // Bazel would eventually time out the test after 900s ("large") if, for
66 // some reason, the context cancellation fails to abort it.
67 globalTestTimeout = 600 * time.Second
68
69 // Timeouts for individual end-to-end tests of different sizes.
70 smallTestTimeout = 60 * time.Second
71 largeTestTimeout = 120 * time.Second
72)
73
Serge Bazanski6d1ff362024-09-30 15:15:31 +000074// TestE2EKubernetesLabels verifies that Kubernetes node labels are being updated
75// when the cluster state changes.
76func TestE2EKubernetesLabels(t *testing.T) {
77 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
78 defer cancel()
79
80 clusterOptions := mlaunch.ClusterOptions{
81 NumNodes: 2,
82 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +010083 ClusterDomain: "cluster.test",
Serge Bazanski6d1ff362024-09-30 15:15:31 +000084 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
85 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
Serge Bazanski78567602024-10-31 13:42:04 +000086 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
87 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanskie99638e2024-09-30 17:06:44 +000088 {Regexp: `^test\.monogon\.dev/`},
89 },
90 },
Serge Bazanski6d1ff362024-09-30 15:15:31 +000091 },
92 }
93 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
94 if err != nil {
95 t.Fatalf("LaunchCluster failed: %v", err)
96 }
97 defer func() {
98 err := cluster.Close()
99 if err != nil {
100 t.Fatalf("cluster Close failed: %v", err)
101 }
102 }()
103
104 con, err := cluster.CuratorClient()
105 if err != nil {
106 t.Fatalf("Could not get curator client: %v", err)
107 }
108 mgmt := apb.NewManagementClient(con)
Lorenz Brun8f1254d2025-01-28 14:10:05 +0100109 clientSet, _, err := cluster.GetKubeClientSet()
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000110 if err != nil {
111 t.Fatal(err)
112 }
113
114 getLabelsForNode := func(nid string) common.Labels {
115 node, err := clientSet.CoreV1().Nodes().Get(ctx, nid, metav1.GetOptions{})
116 if kerrors.IsNotFound(err) {
117 return nil
118 }
119 if err != nil {
120 t.Fatalf("Could not get node %s: %v", nid, err)
121 return nil
122 }
123 return common.Labels(node.Labels).Filter(func(k, v string) bool {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000124 if strings.HasPrefix(k, "node-role.kubernetes.io/") {
125 return true
126 }
127 if strings.HasPrefix(k, "test.monogon.dev/") {
128 return true
129 }
130 return false
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000131 })
132 }
133
134 // Nodes should have no labels at first.
135 for _, nid := range cluster.NodeIDs {
136 if labels := getLabelsForNode(nid); !labels.Equals(nil) {
137 t.Errorf("Node %s should have no labels, has %s", nid, labels)
138 }
139 }
140 // Nominate both nodes to be Kubernetes workers.
141 for _, nid := range cluster.NodeIDs {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000142 _, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
143 Node: &apb.UpdateNodeRolesRequest_Id{
144 Id: nid,
145 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100146 KubernetesWorker: ptr.To(true),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000147 })
148 if err != nil {
149 t.Fatalf("Could not make %s a KubernetesWorker: %v", nid, err)
150 }
151 }
152
Jan Schär36f03752024-11-19 17:41:05 +0100153 util.MustTestEventual(t, "Labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000154 // Nodes should have role labels now.
155 for _, nid := range cluster.NodeIDs {
156 want := common.Labels{
157 "node-role.kubernetes.io/KubernetesWorker": "",
158 }
159 if nid == cluster.NodeIDs[0] {
160 want["node-role.kubernetes.io/KubernetesController"] = ""
161 want["node-role.kubernetes.io/ConsensusMember"] = ""
162 }
163 if labels := getLabelsForNode(nid); !want.Equals(labels) {
164 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
165 }
166 }
167 return nil
168 })
169
170 // Remove KubernetesWorker from first node again. It will stay in k8s (arguably,
171 // this is a bug) but its role label should be removed.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000172 _, err = mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
173 Node: &apb.UpdateNodeRolesRequest_Id{
174 Id: cluster.NodeIDs[0],
175 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100176 KubernetesWorker: ptr.To(false),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000177 })
178 if err != nil {
179 t.Fatalf("Could not remove KubernetesWorker from %s: %v", cluster.NodeIDs[0], err)
180 }
181
Jan Schär36f03752024-11-19 17:41:05 +0100182 util.MustTestEventual(t, "Labels removed", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000183 for _, nid := range cluster.NodeIDs {
184 want := make(common.Labels)
185 if nid == cluster.NodeIDs[0] {
186 want["node-role.kubernetes.io/KubernetesController"] = ""
187 want["node-role.kubernetes.io/ConsensusMember"] = ""
188 } else {
189 want["node-role.kubernetes.io/KubernetesWorker"] = ""
190 }
191 if labels := getLabelsForNode(nid); !want.Equals(labels) {
192 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
193 }
194 }
195 return nil
196 })
Serge Bazanskie99638e2024-09-30 17:06:44 +0000197
198 // Add Metropolis node label, ensure it gets reflected on the Kubernetes node.
199 _, err = mgmt.UpdateNodeLabels(ctx, &apb.UpdateNodeLabelsRequest{
200 Node: &apb.UpdateNodeLabelsRequest_Id{
201 Id: cluster.NodeIDs[1],
202 },
203 Upsert: []*apb.UpdateNodeLabelsRequest_Pair{
204 {Key: "test.monogon.dev/foo", Value: "bar"},
205 },
206 })
207
Jan Schär36f03752024-11-19 17:41:05 +0100208 util.MustTestEventual(t, "Metropolis labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000209 if err != nil {
210 t.Fatalf("Could not add label to node: %v", err)
211 }
212 want := common.Labels{
213 "node-role.kubernetes.io/KubernetesWorker": "",
214 "test.monogon.dev/foo": "bar",
215 }
216 if labels := getLabelsForNode(cluster.NodeIDs[1]); !want.Equals(labels) {
Serge Bazanski1e399142024-10-22 10:58:15 +0000217 return fmt.Errorf("node %s should have labels %s, has %s", cluster.NodeIDs[1], want, labels)
Serge Bazanskie99638e2024-09-30 17:06:44 +0000218 }
219 return nil
220 })
Serge Bazanski1e399142024-10-22 10:58:15 +0000221
222 // Reconfigure node label rules.
223 _, err = mgmt.ConfigureCluster(ctx, &apb.ConfigureClusterRequest{
224 BaseConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000225 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
226 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanski1e399142024-10-22 10:58:15 +0000227 {Regexp: `^test\.monogon\.dev/`},
228 },
229 },
230 },
231 NewConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000232 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{},
Serge Bazanski1e399142024-10-22 10:58:15 +0000233 },
234 UpdateMask: &fieldmaskpb.FieldMask{
Serge Bazanski78567602024-10-31 13:42:04 +0000235 Paths: []string{"kubernetes.node_labels_to_synchronize"},
Serge Bazanski1e399142024-10-22 10:58:15 +0000236 },
237 })
238 if err != nil {
239 t.Fatalf("Could not update cluster configuration: %v", err)
240 }
241
242 ci, err := mgmt.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
243 if err != nil {
244 t.Fatalf("Could not get cluster info")
245 }
246 // See if the config changed.
Serge Bazanski78567602024-10-31 13:42:04 +0000247 if rules := ci.ClusterConfiguration.Kubernetes.NodeLabelsToSynchronize; len(rules) != 0 {
Serge Bazanski1e399142024-10-22 10:58:15 +0000248 t.Fatalf("Wanted 0 label rules in config after reconfiguration, have %d: %v", len(rules), rules)
249 }
250 // TODO: ensure new rules get applied, but that will require watching the cluster
251 // config for changes in the labelmaker.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000252}
253
Serge Bazanski99b02142024-04-17 16:33:28 +0200254// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
255//
256// The tests are performed against an in-memory cluster.
257func TestE2EKubernetes(t *testing.T) {
258 // Set a global timeout to make sure this terminates
259 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
260 defer cancel()
261
Tim Windelschmidt82e6af72024-07-23 00:05:42 +0000262 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +0200263 if err != nil {
264 t.Fatalf("Reading registry manifest failed: %v", err)
265 }
266 lr, err := localregistry.FromBazelManifest(df)
267 if err != nil {
268 t.Fatalf("Creating test image registry failed: %v", err)
269 }
270
271 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200272 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +0200273 NumNodes: 2,
274 LocalRegistry: lr,
Lorenz Brun732a8842024-08-26 23:25:37 +0200275 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +0100276 ClusterDomain: "cluster.test",
Lorenz Brun732a8842024-08-26 23:25:37 +0200277 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
278 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
279 },
Serge Bazanski99b02142024-04-17 16:33:28 +0200280 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200281 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +0200282 if err != nil {
283 t.Fatalf("LaunchCluster failed: %v", err)
284 }
285 defer func() {
286 err := cluster.Close()
287 if err != nil {
288 t.Fatalf("cluster Close failed: %v", err)
289 }
290 }()
291
Lorenz Brunde57e6f2025-01-08 16:34:08 +0000292 clientSet, restConfig, err := cluster.GetKubeClientSet()
Serge Bazanski99b02142024-04-17 16:33:28 +0200293 if err != nil {
294 t.Fatal(err)
295 }
296 util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
297 // Make everything but the first node into KubernetesWorkers.
298 for i := 1; i < clusterOptions.NumNodes; i++ {
299 err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
300 if err != nil {
301 return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
302 }
303 }
304 return nil
305 })
306 util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
307 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
308 if err != nil {
309 return err
310 }
311 if len(nodes.Items) < 1 {
312 return errors.New("node not yet registered")
313 }
314 node := nodes.Items[0]
315 for _, cond := range node.Status.Conditions {
316 if cond.Type != corev1.NodeReady {
317 continue
318 }
319 if cond.Status != corev1.ConditionTrue {
320 return fmt.Errorf("node not ready: %v", cond.Message)
321 }
322 }
323 return nil
324 })
325 util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
326 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
327 return err
328 })
329 util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
330 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
331 if err != nil {
332 return err
333 }
334 if len(res.Items) == 0 {
335 return errors.New("pod didn't get created")
336 }
337 pod := res.Items[0]
338 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
339 return nil
340 }
341 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
342 if err != nil || len(events.Items) == 0 {
343 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
344 } else {
345 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
346 }
347 })
348 util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
349 deployment := makeTestDeploymentSpec("test-deploy-2")
350 gvisorStr := "gvisor"
351 deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
352 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
353 return err
354 })
355 util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
356 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
357 if err != nil {
358 return err
359 }
360 if len(res.Items) == 0 {
361 return errors.New("pod didn't get created")
362 }
363 pod := res.Items[0]
364 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
365 return nil
366 }
367 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
368 if err != nil || len(events.Items) == 0 {
369 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
370 } else {
371 var errorMsg strings.Builder
372 for _, msg := range events.Items {
373 errorMsg.WriteString(" | ")
374 errorMsg.WriteString(msg.Message)
375 }
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200376 return fmt.Errorf("pod is not ready: %s", errorMsg.String())
Serge Bazanski99b02142024-04-17 16:33:28 +0200377 }
378 })
Lorenz Brunde57e6f2025-01-08 16:34:08 +0000379 t.Run("Connectivity Smoke Tests", func(t *testing.T) {
380 ct := connectivity.SetupTest(t, &connectivity.TestSpec{
381 Name: "connectivity-smoke",
382 ClientSet: clientSet,
383 RESTConfig: restConfig,
384 NumPods: 2,
385 ExtraPodConfig: func(i int, pod *corev1.Pod) {
386 // Spread pods out over nodes to test inter-node network
387 pod.Labels = make(map[string]string)
388 pod.Labels["name"] = "connectivity-smoketest"
389 pod.Spec.TopologySpreadConstraints = []corev1.TopologySpreadConstraint{{
390 MaxSkew: 1,
391 TopologyKey: "kubernetes.io/hostname",
392 WhenUnsatisfiable: corev1.DoNotSchedule,
393 LabelSelector: metav1.SetAsLabelSelector(pod.Labels),
394 }}
395 },
396 })
397 ct.TestPodConnectivity(t, 0, 1, 1234, connectivity.ExpectedSuccess)
398 })
Lorenz Brun52700ae2025-01-28 15:07:08 +0100399 t.Run("Network Policy Smoke Test", func(t *testing.T) {
400 ct := connectivity.SetupTest(t, &connectivity.TestSpec{
401 Name: "npc-smoke",
402 ClientSet: clientSet,
403 RESTConfig: restConfig,
404 NumPods: 2,
405 ExtraPodConfig: func(i int, pod *corev1.Pod) {
406 // Spread pods out over nodes to test inter-node network
407 pod.Labels = make(map[string]string)
408 pod.Labels["name"] = "npc-smoke"
409 pod.Spec.TopologySpreadConstraints = []corev1.TopologySpreadConstraint{{
410 MaxSkew: 1,
411 TopologyKey: "kubernetes.io/hostname",
412 WhenUnsatisfiable: corev1.DoNotSchedule,
413 LabelSelector: metav1.SetAsLabelSelector(pod.Labels),
414 }}
415 },
416 })
417 // Test connectivity before applying network policy
418 ct.TestPodConnectivity(t, 0, 1, 1234, connectivity.ExpectedSuccess)
419 ct.TestPodConnectivity(t, 0, 1, 1235, connectivity.ExpectedSuccess)
420 nwp := &nwkv1.NetworkPolicy{
421 ObjectMeta: metav1.ObjectMeta{
422 Name: "npc-smoke",
423 },
424 Spec: nwkv1.NetworkPolicySpec{
425 PodSelector: metav1.LabelSelector{MatchLabels: map[string]string{"name": "npc-smoke"}},
426 Ingress: []nwkv1.NetworkPolicyIngressRule{{
427 Ports: []nwkv1.NetworkPolicyPort{{
428 Protocol: ptr.To(corev1.ProtocolTCP),
429 Port: &intstr.IntOrString{Type: intstr.Int, IntVal: 1234},
430 }},
431 From: []nwkv1.NetworkPolicyPeer{{
432 PodSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"name": "npc-smoke"}},
433 }},
434 }},
435 },
436 }
437 if _, err := clientSet.NetworkingV1().NetworkPolicies("default").Create(context.Background(), nwp, metav1.CreateOptions{}); err != nil {
438 t.Fatal(err)
439 }
440 // Check if policy is in effect
441 ct.TestPodConnectivityEventual(t, 0, 1, 1235, connectivity.ExpectedReject, 30*time.Second)
442 ct.TestPodConnectivity(t, 0, 1, 1234, connectivity.ExpectedSuccess)
443 })
Jan Schär73beb692024-11-27 17:47:09 +0100444 for _, runtimeClass := range []string{"runc", "gvisor"} {
445 statefulSetName := fmt.Sprintf("test-statefulset-%s", runtimeClass)
446 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
447 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet(statefulSetName, runtimeClass), metav1.CreateOptions{})
Serge Bazanski99b02142024-04-17 16:33:28 +0200448 return err
Jan Schär73beb692024-11-27 17:47:09 +0100449 })
Jan Schärb00f7f92025-03-06 17:27:22 +0100450 var podName string
Jan Schär73beb692024-11-27 17:47:09 +0100451 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests successful", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
452 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("name=%s", statefulSetName)})
453 if err != nil {
454 return err
Jan Schär652c2ad2024-11-19 17:40:50 +0100455 }
Jan Schär73beb692024-11-27 17:47:09 +0100456 if len(res.Items) == 0 {
457 return errors.New("pod didn't get created")
458 }
459 pod := res.Items[0]
Jan Schärb00f7f92025-03-06 17:27:22 +0100460 podName = pod.Name
461 lines, err := getPodLogLines(ctx, clientSet, podName, 50)
Jan Schär73beb692024-11-27 17:47:09 +0100462 if err != nil {
463 return fmt.Errorf("could not get logs: %w", err)
464 }
Jan Schärb00f7f92025-03-06 17:27:22 +0100465 if slices.Contains(lines, "[INIT-PASSED]") {
466 return nil
467 }
468 if slices.Contains(lines, "[INIT-FAILED]") {
469 return util.Permanent(fmt.Errorf("tests failed, log:\n %s", strings.Join(lines, "\n ")))
Jan Schär73beb692024-11-27 17:47:09 +0100470 }
471 return fmt.Errorf("pod is not ready: %v, log:\n %s", pod.Status.Phase, strings.Join(lines, "\n "))
472 })
Jan Schärb00f7f92025-03-06 17:27:22 +0100473 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s request resize", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
474 for _, templateName := range []string{"vol-default", "vol-readonly", "vol-block"} {
475 name := fmt.Sprintf("%s-%s-0", templateName, statefulSetName)
476 patch := `{"spec": {"resources": {"requests": {"storage": "4Mi"}}}}`
477 _, err := clientSet.CoreV1().PersistentVolumeClaims("default").Patch(ctx, name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
478 if err != nil {
479 return err
480 }
481 }
482 return nil
483 })
484 i := 0
485 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s resize successful", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
486 // Make a change to the pod to make kubelet look at it and notice that it
487 // should call NodeExpandVolume. If we don't do this, it might take up to
488 // 1 minute for kubelet to notice, which slows down the test.
489 patch := fmt.Sprintf(`{"metadata": {"labels": {"trigger-kubelet-update": "%d"}}}`, i)
490 i += 1
491 _, err := clientSet.CoreV1().Pods("default").Patch(ctx, podName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
492 if err != nil {
493 return err
494 }
495
496 lines, err := getPodLogLines(ctx, clientSet, podName, 50)
497 if err != nil {
498 return fmt.Errorf("could not get logs: %w", err)
499 }
500 if slices.Contains(lines, "[RESIZE-PASSED]") {
501 return nil
502 }
503 return fmt.Errorf("waiting for resize, log:\n %s", strings.Join(lines, "\n "))
504 })
Jan Schär73beb692024-11-27 17:47:09 +0100505 }
Lorenz Brun2ecccae2024-11-27 22:03:35 +0100506 util.TestEventual(t, "Deployment in user namespace", ctx, largeTestTimeout, func(ctx context.Context) error {
507 deployment := makeTestDeploymentSpec("test-userns-1")
508 deployment.Spec.Template.Spec.HostUsers = ptr.To(false)
509 deployment.Spec.Template.Spec.Containers[0].ReadinessProbe.HTTPGet.Path = "/ready_userns"
510 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
511 return err
512 })
513 util.TestEventual(t, "Deployment in user namespace is running", ctx, largeTestTimeout, func(ctx context.Context) error {
514 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-userns-1"})
515 if err != nil {
516 return err
517 }
518 if len(res.Items) == 0 {
519 return errors.New("pod didn't get created")
520 }
521 pod := res.Items[0]
522 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
523 return nil
524 }
525 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
526 if err != nil || len(events.Items) == 0 {
527 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
528 } else {
529 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
530 }
531 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200532 util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
533 _, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
534 return err
535 })
536 util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
537 res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
538 if err != nil {
539 return err
540 }
541 if res.Status.Failed > 0 {
542 pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
543 LabelSelector: "job-name=selftest",
544 })
545 if err != nil {
546 return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
547 }
548 if len(pods.Items) < 1 {
549 return fmt.Errorf("job failed but pod does not exist")
550 }
551 lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
552 if err != nil {
553 return fmt.Errorf("job failed but could not get logs: %w", err)
554 }
555 if len(lines) > 0 {
556 return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
557 }
558 return util.Permanent(fmt.Errorf("job failed, empty log"))
559 }
560 if res.Status.Succeeded > 0 {
561 return nil
562 }
563 return fmt.Errorf("job still running")
564 })
565 util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
566 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
567 if err != nil && !kerrors.IsAlreadyExists(err) {
568 return err
569 }
570 _, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
571 if err != nil && !kerrors.IsAlreadyExists(err) {
572 return err
573 }
574 return nil
575 })
576 util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
577 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
578 if err != nil {
579 return err
580 }
581 // Use a new client for each attempt
582 hc := http.Client{
583 Timeout: 2 * time.Second,
584 Transport: &http.Transport{
585 Dial: cluster.SOCKSDialer.Dial,
586 },
587 }
588 for _, n := range nodes.Items {
589 var addr string
590 for _, a := range n.Status.Addresses {
591 if a.Type == corev1.NodeInternalIP {
592 addr = a.Address
593 }
594 }
595 u := url.URL{Scheme: "http", Host: addr, Path: "/"}
596 res, err := hc.Get(u.String())
597 if err != nil {
598 return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
599 }
600 if res.StatusCode != http.StatusOK {
601 return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
602 }
603 t.Logf("Got response from %q", n.Name)
604 }
605 return nil
606 })
607 util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
608 pool := x509.NewCertPool()
609 pool.AddCert(cluster.CACertificate)
610 cl := http.Client{
611 Transport: &http.Transport{
612 TLSClientConfig: &tls.Config{
613 Certificates: []tls.Certificate{cluster.Owner},
614 RootCAs: pool,
615 },
616 DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
617 return cluster.DialNode(ctx, addr)
618 },
619 },
620 }
621 u := url.URL{
622 Scheme: "https",
623 Host: net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
624 Path: "/metrics/containerd",
625 }
626 res, err := cl.Get(u.String())
627 if err != nil {
628 return err
629 }
630 defer res.Body.Close()
631 if res.StatusCode != 200 {
632 return fmt.Errorf("status code %d", res.StatusCode)
633 }
634
635 body, err := io.ReadAll(res.Body)
636 if err != nil {
637 return err
638 }
639 needle := "containerd_build_info_total"
640 if !strings.Contains(string(body), needle) {
641 return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
642 }
643 return nil
644 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200645}