blob: 0acc473c8869d6b8a3bf943d8b1c1a21b4ffcb92 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanski99b02142024-04-17 16:33:28 +02004package kubernetes
5
6import (
7 "context"
8 "crypto/tls"
9 "crypto/x509"
10 "errors"
11 "fmt"
12 "io"
13 "net"
14 "net/http"
15 _ "net/http/pprof"
16 "net/url"
17 "os"
18 "strings"
19 "testing"
20 "time"
21
22 "github.com/bazelbuild/rules_go/go/runfiles"
Serge Bazanski1e399142024-10-22 10:58:15 +000023 "google.golang.org/protobuf/types/known/fieldmaskpb"
Serge Bazanski99b02142024-04-17 16:33:28 +020024 corev1 "k8s.io/api/core/v1"
25 kerrors "k8s.io/apimachinery/pkg/api/errors"
Serge Bazanski99b02142024-04-17 16:33:28 +020026 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
Lorenz Brun2ecccae2024-11-27 22:03:35 +010028 "k8s.io/utils/ptr"
Serge Bazanski99b02142024-04-17 16:33:28 +020029
Lorenz Brun732a8842024-08-26 23:25:37 +020030 common "source.monogon.dev/metropolis/node"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000031 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun732a8842024-08-26 23:25:37 +020032 cpb "source.monogon.dev/metropolis/proto/common"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020033 mlaunch "source.monogon.dev/metropolis/test/launch"
34 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020035 "source.monogon.dev/metropolis/test/util"
Serge Bazanski99b02142024-04-17 16:33:28 +020036)
37
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000038var (
39 // These are filled by bazel at linking time with the canonical path of
40 // their corresponding file. Inside the init function we resolve it
41 // with the rules_go runfiles package to the real path.
42 xTestImagesManifestPath string
43)
44
45func init() {
46 var err error
47 for _, path := range []*string{
48 &xTestImagesManifestPath,
49 } {
50 *path, err = runfiles.Rlocation(*path)
51 if err != nil {
52 panic(err)
53 }
54 }
55}
56
Serge Bazanski99b02142024-04-17 16:33:28 +020057const (
58 // Timeout for the global test context.
59 //
60 // Bazel would eventually time out the test after 900s ("large") if, for
61 // some reason, the context cancellation fails to abort it.
62 globalTestTimeout = 600 * time.Second
63
64 // Timeouts for individual end-to-end tests of different sizes.
65 smallTestTimeout = 60 * time.Second
66 largeTestTimeout = 120 * time.Second
67)
68
Serge Bazanski6d1ff362024-09-30 15:15:31 +000069// TestE2EKubernetesLabels verifies that Kubernetes node labels are being updated
70// when the cluster state changes.
71func TestE2EKubernetesLabels(t *testing.T) {
72 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
73 defer cancel()
74
75 clusterOptions := mlaunch.ClusterOptions{
76 NumNodes: 2,
77 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +010078 ClusterDomain: "cluster.test",
Serge Bazanski6d1ff362024-09-30 15:15:31 +000079 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
80 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
Serge Bazanski78567602024-10-31 13:42:04 +000081 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
82 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanskie99638e2024-09-30 17:06:44 +000083 {Regexp: `^test\.monogon\.dev/`},
84 },
85 },
Serge Bazanski6d1ff362024-09-30 15:15:31 +000086 },
87 }
88 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
89 if err != nil {
90 t.Fatalf("LaunchCluster failed: %v", err)
91 }
92 defer func() {
93 err := cluster.Close()
94 if err != nil {
95 t.Fatalf("cluster Close failed: %v", err)
96 }
97 }()
98
99 con, err := cluster.CuratorClient()
100 if err != nil {
101 t.Fatalf("Could not get curator client: %v", err)
102 }
103 mgmt := apb.NewManagementClient(con)
Lorenz Brun8f1254d2025-01-28 14:10:05 +0100104 clientSet, _, err := cluster.GetKubeClientSet()
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000105 if err != nil {
106 t.Fatal(err)
107 }
108
109 getLabelsForNode := func(nid string) common.Labels {
110 node, err := clientSet.CoreV1().Nodes().Get(ctx, nid, metav1.GetOptions{})
111 if kerrors.IsNotFound(err) {
112 return nil
113 }
114 if err != nil {
115 t.Fatalf("Could not get node %s: %v", nid, err)
116 return nil
117 }
118 return common.Labels(node.Labels).Filter(func(k, v string) bool {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000119 if strings.HasPrefix(k, "node-role.kubernetes.io/") {
120 return true
121 }
122 if strings.HasPrefix(k, "test.monogon.dev/") {
123 return true
124 }
125 return false
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000126 })
127 }
128
129 // Nodes should have no labels at first.
130 for _, nid := range cluster.NodeIDs {
131 if labels := getLabelsForNode(nid); !labels.Equals(nil) {
132 t.Errorf("Node %s should have no labels, has %s", nid, labels)
133 }
134 }
135 // Nominate both nodes to be Kubernetes workers.
136 for _, nid := range cluster.NodeIDs {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000137 _, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
138 Node: &apb.UpdateNodeRolesRequest_Id{
139 Id: nid,
140 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100141 KubernetesWorker: ptr.To(true),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000142 })
143 if err != nil {
144 t.Fatalf("Could not make %s a KubernetesWorker: %v", nid, err)
145 }
146 }
147
Jan Schär36f03752024-11-19 17:41:05 +0100148 util.MustTestEventual(t, "Labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000149 // Nodes should have role labels now.
150 for _, nid := range cluster.NodeIDs {
151 want := common.Labels{
152 "node-role.kubernetes.io/KubernetesWorker": "",
153 }
154 if nid == cluster.NodeIDs[0] {
155 want["node-role.kubernetes.io/KubernetesController"] = ""
156 want["node-role.kubernetes.io/ConsensusMember"] = ""
157 }
158 if labels := getLabelsForNode(nid); !want.Equals(labels) {
159 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
160 }
161 }
162 return nil
163 })
164
165 // Remove KubernetesWorker from first node again. It will stay in k8s (arguably,
166 // this is a bug) but its role label should be removed.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000167 _, err = mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
168 Node: &apb.UpdateNodeRolesRequest_Id{
169 Id: cluster.NodeIDs[0],
170 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100171 KubernetesWorker: ptr.To(false),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000172 })
173 if err != nil {
174 t.Fatalf("Could not remove KubernetesWorker from %s: %v", cluster.NodeIDs[0], err)
175 }
176
Jan Schär36f03752024-11-19 17:41:05 +0100177 util.MustTestEventual(t, "Labels removed", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000178 for _, nid := range cluster.NodeIDs {
179 want := make(common.Labels)
180 if nid == cluster.NodeIDs[0] {
181 want["node-role.kubernetes.io/KubernetesController"] = ""
182 want["node-role.kubernetes.io/ConsensusMember"] = ""
183 } else {
184 want["node-role.kubernetes.io/KubernetesWorker"] = ""
185 }
186 if labels := getLabelsForNode(nid); !want.Equals(labels) {
187 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
188 }
189 }
190 return nil
191 })
Serge Bazanskie99638e2024-09-30 17:06:44 +0000192
193 // Add Metropolis node label, ensure it gets reflected on the Kubernetes node.
194 _, err = mgmt.UpdateNodeLabels(ctx, &apb.UpdateNodeLabelsRequest{
195 Node: &apb.UpdateNodeLabelsRequest_Id{
196 Id: cluster.NodeIDs[1],
197 },
198 Upsert: []*apb.UpdateNodeLabelsRequest_Pair{
199 {Key: "test.monogon.dev/foo", Value: "bar"},
200 },
201 })
202
Jan Schär36f03752024-11-19 17:41:05 +0100203 util.MustTestEventual(t, "Metropolis labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000204 if err != nil {
205 t.Fatalf("Could not add label to node: %v", err)
206 }
207 want := common.Labels{
208 "node-role.kubernetes.io/KubernetesWorker": "",
209 "test.monogon.dev/foo": "bar",
210 }
211 if labels := getLabelsForNode(cluster.NodeIDs[1]); !want.Equals(labels) {
Serge Bazanski1e399142024-10-22 10:58:15 +0000212 return fmt.Errorf("node %s should have labels %s, has %s", cluster.NodeIDs[1], want, labels)
Serge Bazanskie99638e2024-09-30 17:06:44 +0000213 }
214 return nil
215 })
Serge Bazanski1e399142024-10-22 10:58:15 +0000216
217 // Reconfigure node label rules.
218 _, err = mgmt.ConfigureCluster(ctx, &apb.ConfigureClusterRequest{
219 BaseConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000220 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
221 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanski1e399142024-10-22 10:58:15 +0000222 {Regexp: `^test\.monogon\.dev/`},
223 },
224 },
225 },
226 NewConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000227 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{},
Serge Bazanski1e399142024-10-22 10:58:15 +0000228 },
229 UpdateMask: &fieldmaskpb.FieldMask{
Serge Bazanski78567602024-10-31 13:42:04 +0000230 Paths: []string{"kubernetes.node_labels_to_synchronize"},
Serge Bazanski1e399142024-10-22 10:58:15 +0000231 },
232 })
233 if err != nil {
234 t.Fatalf("Could not update cluster configuration: %v", err)
235 }
236
237 ci, err := mgmt.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
238 if err != nil {
239 t.Fatalf("Could not get cluster info")
240 }
241 // See if the config changed.
Serge Bazanski78567602024-10-31 13:42:04 +0000242 if rules := ci.ClusterConfiguration.Kubernetes.NodeLabelsToSynchronize; len(rules) != 0 {
Serge Bazanski1e399142024-10-22 10:58:15 +0000243 t.Fatalf("Wanted 0 label rules in config after reconfiguration, have %d: %v", len(rules), rules)
244 }
245 // TODO: ensure new rules get applied, but that will require watching the cluster
246 // config for changes in the labelmaker.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000247}
248
Serge Bazanski99b02142024-04-17 16:33:28 +0200249// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
250//
251// The tests are performed against an in-memory cluster.
252func TestE2EKubernetes(t *testing.T) {
253 // Set a global timeout to make sure this terminates
254 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
255 defer cancel()
256
Tim Windelschmidt82e6af72024-07-23 00:05:42 +0000257 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +0200258 if err != nil {
259 t.Fatalf("Reading registry manifest failed: %v", err)
260 }
261 lr, err := localregistry.FromBazelManifest(df)
262 if err != nil {
263 t.Fatalf("Creating test image registry failed: %v", err)
264 }
265
266 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200267 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +0200268 NumNodes: 2,
269 LocalRegistry: lr,
Lorenz Brun732a8842024-08-26 23:25:37 +0200270 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +0100271 ClusterDomain: "cluster.test",
Lorenz Brun732a8842024-08-26 23:25:37 +0200272 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
273 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
274 },
Serge Bazanski99b02142024-04-17 16:33:28 +0200275 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200276 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +0200277 if err != nil {
278 t.Fatalf("LaunchCluster failed: %v", err)
279 }
280 defer func() {
281 err := cluster.Close()
282 if err != nil {
283 t.Fatalf("cluster Close failed: %v", err)
284 }
285 }()
286
Lorenz Brun8f1254d2025-01-28 14:10:05 +0100287 clientSet, _, err := cluster.GetKubeClientSet()
Serge Bazanski99b02142024-04-17 16:33:28 +0200288 if err != nil {
289 t.Fatal(err)
290 }
291 util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
292 // Make everything but the first node into KubernetesWorkers.
293 for i := 1; i < clusterOptions.NumNodes; i++ {
294 err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
295 if err != nil {
296 return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
297 }
298 }
299 return nil
300 })
301 util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
302 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
303 if err != nil {
304 return err
305 }
306 if len(nodes.Items) < 1 {
307 return errors.New("node not yet registered")
308 }
309 node := nodes.Items[0]
310 for _, cond := range node.Status.Conditions {
311 if cond.Type != corev1.NodeReady {
312 continue
313 }
314 if cond.Status != corev1.ConditionTrue {
315 return fmt.Errorf("node not ready: %v", cond.Message)
316 }
317 }
318 return nil
319 })
320 util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
321 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
322 return err
323 })
324 util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
325 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
326 if err != nil {
327 return err
328 }
329 if len(res.Items) == 0 {
330 return errors.New("pod didn't get created")
331 }
332 pod := res.Items[0]
333 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
334 return nil
335 }
336 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
337 if err != nil || len(events.Items) == 0 {
338 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
339 } else {
340 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
341 }
342 })
343 util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
344 deployment := makeTestDeploymentSpec("test-deploy-2")
345 gvisorStr := "gvisor"
346 deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
347 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
348 return err
349 })
350 util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
351 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
352 if err != nil {
353 return err
354 }
355 if len(res.Items) == 0 {
356 return errors.New("pod didn't get created")
357 }
358 pod := res.Items[0]
359 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
360 return nil
361 }
362 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
363 if err != nil || len(events.Items) == 0 {
364 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
365 } else {
366 var errorMsg strings.Builder
367 for _, msg := range events.Items {
368 errorMsg.WriteString(" | ")
369 errorMsg.WriteString(msg.Message)
370 }
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200371 return fmt.Errorf("pod is not ready: %s", errorMsg.String())
Serge Bazanski99b02142024-04-17 16:33:28 +0200372 }
373 })
Jan Schär73beb692024-11-27 17:47:09 +0100374 for _, runtimeClass := range []string{"runc", "gvisor"} {
375 statefulSetName := fmt.Sprintf("test-statefulset-%s", runtimeClass)
376 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
377 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet(statefulSetName, runtimeClass), metav1.CreateOptions{})
Serge Bazanski99b02142024-04-17 16:33:28 +0200378 return err
Jan Schär73beb692024-11-27 17:47:09 +0100379 })
380 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests successful", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
381 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("name=%s", statefulSetName)})
382 if err != nil {
383 return err
Jan Schär652c2ad2024-11-19 17:40:50 +0100384 }
Jan Schär73beb692024-11-27 17:47:09 +0100385 if len(res.Items) == 0 {
386 return errors.New("pod didn't get created")
387 }
388 pod := res.Items[0]
389 lines, err := getPodLogLines(ctx, clientSet, pod.Name, 50)
390 if err != nil {
391 return fmt.Errorf("could not get logs: %w", err)
392 }
393 if len(lines) > 0 {
394 switch lines[len(lines)-1] {
395 case "[TESTS-PASSED]":
396 return nil
397 case "[TESTS-FAILED]":
398 return util.Permanent(fmt.Errorf("tests failed, log:\n %s", strings.Join(lines, "\n ")))
399 }
400 }
401 return fmt.Errorf("pod is not ready: %v, log:\n %s", pod.Status.Phase, strings.Join(lines, "\n "))
402 })
403 }
Lorenz Brun2ecccae2024-11-27 22:03:35 +0100404 util.TestEventual(t, "Deployment in user namespace", ctx, largeTestTimeout, func(ctx context.Context) error {
405 deployment := makeTestDeploymentSpec("test-userns-1")
406 deployment.Spec.Template.Spec.HostUsers = ptr.To(false)
407 deployment.Spec.Template.Spec.Containers[0].ReadinessProbe.HTTPGet.Path = "/ready_userns"
408 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
409 return err
410 })
411 util.TestEventual(t, "Deployment in user namespace is running", ctx, largeTestTimeout, func(ctx context.Context) error {
412 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-userns-1"})
413 if err != nil {
414 return err
415 }
416 if len(res.Items) == 0 {
417 return errors.New("pod didn't get created")
418 }
419 pod := res.Items[0]
420 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
421 return nil
422 }
423 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
424 if err != nil || len(events.Items) == 0 {
425 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
426 } else {
427 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
428 }
429 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200430 util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
431 _, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
432 return err
433 })
434 util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
435 res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
436 if err != nil {
437 return err
438 }
439 if res.Status.Failed > 0 {
440 pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
441 LabelSelector: "job-name=selftest",
442 })
443 if err != nil {
444 return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
445 }
446 if len(pods.Items) < 1 {
447 return fmt.Errorf("job failed but pod does not exist")
448 }
449 lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
450 if err != nil {
451 return fmt.Errorf("job failed but could not get logs: %w", err)
452 }
453 if len(lines) > 0 {
454 return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
455 }
456 return util.Permanent(fmt.Errorf("job failed, empty log"))
457 }
458 if res.Status.Succeeded > 0 {
459 return nil
460 }
461 return fmt.Errorf("job still running")
462 })
463 util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
464 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
465 if err != nil && !kerrors.IsAlreadyExists(err) {
466 return err
467 }
468 _, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
469 if err != nil && !kerrors.IsAlreadyExists(err) {
470 return err
471 }
472 return nil
473 })
474 util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
475 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
476 if err != nil {
477 return err
478 }
479 // Use a new client for each attempt
480 hc := http.Client{
481 Timeout: 2 * time.Second,
482 Transport: &http.Transport{
483 Dial: cluster.SOCKSDialer.Dial,
484 },
485 }
486 for _, n := range nodes.Items {
487 var addr string
488 for _, a := range n.Status.Addresses {
489 if a.Type == corev1.NodeInternalIP {
490 addr = a.Address
491 }
492 }
493 u := url.URL{Scheme: "http", Host: addr, Path: "/"}
494 res, err := hc.Get(u.String())
495 if err != nil {
496 return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
497 }
498 if res.StatusCode != http.StatusOK {
499 return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
500 }
501 t.Logf("Got response from %q", n.Name)
502 }
503 return nil
504 })
505 util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
506 pool := x509.NewCertPool()
507 pool.AddCert(cluster.CACertificate)
508 cl := http.Client{
509 Transport: &http.Transport{
510 TLSClientConfig: &tls.Config{
511 Certificates: []tls.Certificate{cluster.Owner},
512 RootCAs: pool,
513 },
514 DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
515 return cluster.DialNode(ctx, addr)
516 },
517 },
518 }
519 u := url.URL{
520 Scheme: "https",
521 Host: net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
522 Path: "/metrics/containerd",
523 }
524 res, err := cl.Get(u.String())
525 if err != nil {
526 return err
527 }
528 defer res.Body.Close()
529 if res.StatusCode != 200 {
530 return fmt.Errorf("status code %d", res.StatusCode)
531 }
532
533 body, err := io.ReadAll(res.Body)
534 if err != nil {
535 return err
536 }
537 needle := "containerd_build_info_total"
538 if !strings.Contains(string(body), needle) {
539 return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
540 }
541 return nil
542 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200543}