blob: 0c116f6ada36d59a9f833925bf40c80f92f45040 [file] [log] [blame]
Serge Bazanski99b02142024-04-17 16:33:28 +02001package kubernetes
2
3import (
4 "context"
5 "crypto/tls"
6 "crypto/x509"
7 "errors"
8 "fmt"
9 "io"
10 "net"
11 "net/http"
12 _ "net/http/pprof"
13 "net/url"
14 "os"
15 "strings"
16 "testing"
17 "time"
18
19 "github.com/bazelbuild/rules_go/go/runfiles"
Serge Bazanski1e399142024-10-22 10:58:15 +000020 "google.golang.org/protobuf/types/known/fieldmaskpb"
Serge Bazanski99b02142024-04-17 16:33:28 +020021 corev1 "k8s.io/api/core/v1"
22 kerrors "k8s.io/apimachinery/pkg/api/errors"
Serge Bazanski99b02142024-04-17 16:33:28 +020023 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
Lorenz Brun2ecccae2024-11-27 22:03:35 +010025 "k8s.io/utils/ptr"
Serge Bazanski99b02142024-04-17 16:33:28 +020026
Lorenz Brun732a8842024-08-26 23:25:37 +020027 common "source.monogon.dev/metropolis/node"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000028 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun732a8842024-08-26 23:25:37 +020029 cpb "source.monogon.dev/metropolis/proto/common"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020030 mlaunch "source.monogon.dev/metropolis/test/launch"
31 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020032 "source.monogon.dev/metropolis/test/util"
Serge Bazanski99b02142024-04-17 16:33:28 +020033)
34
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000035var (
36 // These are filled by bazel at linking time with the canonical path of
37 // their corresponding file. Inside the init function we resolve it
38 // with the rules_go runfiles package to the real path.
39 xTestImagesManifestPath string
40)
41
42func init() {
43 var err error
44 for _, path := range []*string{
45 &xTestImagesManifestPath,
46 } {
47 *path, err = runfiles.Rlocation(*path)
48 if err != nil {
49 panic(err)
50 }
51 }
52}
53
Serge Bazanski99b02142024-04-17 16:33:28 +020054const (
55 // Timeout for the global test context.
56 //
57 // Bazel would eventually time out the test after 900s ("large") if, for
58 // some reason, the context cancellation fails to abort it.
59 globalTestTimeout = 600 * time.Second
60
61 // Timeouts for individual end-to-end tests of different sizes.
62 smallTestTimeout = 60 * time.Second
63 largeTestTimeout = 120 * time.Second
64)
65
Serge Bazanski6d1ff362024-09-30 15:15:31 +000066// TestE2EKubernetesLabels verifies that Kubernetes node labels are being updated
67// when the cluster state changes.
68func TestE2EKubernetesLabels(t *testing.T) {
69 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
70 defer cancel()
71
72 clusterOptions := mlaunch.ClusterOptions{
73 NumNodes: 2,
74 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +010075 ClusterDomain: "cluster.test",
Serge Bazanski6d1ff362024-09-30 15:15:31 +000076 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
77 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
Serge Bazanski78567602024-10-31 13:42:04 +000078 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
79 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanskie99638e2024-09-30 17:06:44 +000080 {Regexp: `^test\.monogon\.dev/`},
81 },
82 },
Serge Bazanski6d1ff362024-09-30 15:15:31 +000083 },
84 }
85 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
86 if err != nil {
87 t.Fatalf("LaunchCluster failed: %v", err)
88 }
89 defer func() {
90 err := cluster.Close()
91 if err != nil {
92 t.Fatalf("cluster Close failed: %v", err)
93 }
94 }()
95
96 con, err := cluster.CuratorClient()
97 if err != nil {
98 t.Fatalf("Could not get curator client: %v", err)
99 }
100 mgmt := apb.NewManagementClient(con)
Lorenz Brun8f1254d2025-01-28 14:10:05 +0100101 clientSet, _, err := cluster.GetKubeClientSet()
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000102 if err != nil {
103 t.Fatal(err)
104 }
105
106 getLabelsForNode := func(nid string) common.Labels {
107 node, err := clientSet.CoreV1().Nodes().Get(ctx, nid, metav1.GetOptions{})
108 if kerrors.IsNotFound(err) {
109 return nil
110 }
111 if err != nil {
112 t.Fatalf("Could not get node %s: %v", nid, err)
113 return nil
114 }
115 return common.Labels(node.Labels).Filter(func(k, v string) bool {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000116 if strings.HasPrefix(k, "node-role.kubernetes.io/") {
117 return true
118 }
119 if strings.HasPrefix(k, "test.monogon.dev/") {
120 return true
121 }
122 return false
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000123 })
124 }
125
126 // Nodes should have no labels at first.
127 for _, nid := range cluster.NodeIDs {
128 if labels := getLabelsForNode(nid); !labels.Equals(nil) {
129 t.Errorf("Node %s should have no labels, has %s", nid, labels)
130 }
131 }
132 // Nominate both nodes to be Kubernetes workers.
133 for _, nid := range cluster.NodeIDs {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000134 _, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
135 Node: &apb.UpdateNodeRolesRequest_Id{
136 Id: nid,
137 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100138 KubernetesWorker: ptr.To(true),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000139 })
140 if err != nil {
141 t.Fatalf("Could not make %s a KubernetesWorker: %v", nid, err)
142 }
143 }
144
Jan Schär36f03752024-11-19 17:41:05 +0100145 util.MustTestEventual(t, "Labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000146 // Nodes should have role labels now.
147 for _, nid := range cluster.NodeIDs {
148 want := common.Labels{
149 "node-role.kubernetes.io/KubernetesWorker": "",
150 }
151 if nid == cluster.NodeIDs[0] {
152 want["node-role.kubernetes.io/KubernetesController"] = ""
153 want["node-role.kubernetes.io/ConsensusMember"] = ""
154 }
155 if labels := getLabelsForNode(nid); !want.Equals(labels) {
156 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
157 }
158 }
159 return nil
160 })
161
162 // Remove KubernetesWorker from first node again. It will stay in k8s (arguably,
163 // this is a bug) but its role label should be removed.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000164 _, err = mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
165 Node: &apb.UpdateNodeRolesRequest_Id{
166 Id: cluster.NodeIDs[0],
167 },
Jan Schärd1a8b642024-12-03 17:40:41 +0100168 KubernetesWorker: ptr.To(false),
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000169 })
170 if err != nil {
171 t.Fatalf("Could not remove KubernetesWorker from %s: %v", cluster.NodeIDs[0], err)
172 }
173
Jan Schär36f03752024-11-19 17:41:05 +0100174 util.MustTestEventual(t, "Labels removed", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000175 for _, nid := range cluster.NodeIDs {
176 want := make(common.Labels)
177 if nid == cluster.NodeIDs[0] {
178 want["node-role.kubernetes.io/KubernetesController"] = ""
179 want["node-role.kubernetes.io/ConsensusMember"] = ""
180 } else {
181 want["node-role.kubernetes.io/KubernetesWorker"] = ""
182 }
183 if labels := getLabelsForNode(nid); !want.Equals(labels) {
184 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
185 }
186 }
187 return nil
188 })
Serge Bazanskie99638e2024-09-30 17:06:44 +0000189
190 // Add Metropolis node label, ensure it gets reflected on the Kubernetes node.
191 _, err = mgmt.UpdateNodeLabels(ctx, &apb.UpdateNodeLabelsRequest{
192 Node: &apb.UpdateNodeLabelsRequest_Id{
193 Id: cluster.NodeIDs[1],
194 },
195 Upsert: []*apb.UpdateNodeLabelsRequest_Pair{
196 {Key: "test.monogon.dev/foo", Value: "bar"},
197 },
198 })
199
Jan Schär36f03752024-11-19 17:41:05 +0100200 util.MustTestEventual(t, "Metropolis labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000201 if err != nil {
202 t.Fatalf("Could not add label to node: %v", err)
203 }
204 want := common.Labels{
205 "node-role.kubernetes.io/KubernetesWorker": "",
206 "test.monogon.dev/foo": "bar",
207 }
208 if labels := getLabelsForNode(cluster.NodeIDs[1]); !want.Equals(labels) {
Serge Bazanski1e399142024-10-22 10:58:15 +0000209 return fmt.Errorf("node %s should have labels %s, has %s", cluster.NodeIDs[1], want, labels)
Serge Bazanskie99638e2024-09-30 17:06:44 +0000210 }
211 return nil
212 })
Serge Bazanski1e399142024-10-22 10:58:15 +0000213
214 // Reconfigure node label rules.
215 _, err = mgmt.ConfigureCluster(ctx, &apb.ConfigureClusterRequest{
216 BaseConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000217 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
218 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanski1e399142024-10-22 10:58:15 +0000219 {Regexp: `^test\.monogon\.dev/`},
220 },
221 },
222 },
223 NewConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000224 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{},
Serge Bazanski1e399142024-10-22 10:58:15 +0000225 },
226 UpdateMask: &fieldmaskpb.FieldMask{
Serge Bazanski78567602024-10-31 13:42:04 +0000227 Paths: []string{"kubernetes.node_labels_to_synchronize"},
Serge Bazanski1e399142024-10-22 10:58:15 +0000228 },
229 })
230 if err != nil {
231 t.Fatalf("Could not update cluster configuration: %v", err)
232 }
233
234 ci, err := mgmt.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
235 if err != nil {
236 t.Fatalf("Could not get cluster info")
237 }
238 // See if the config changed.
Serge Bazanski78567602024-10-31 13:42:04 +0000239 if rules := ci.ClusterConfiguration.Kubernetes.NodeLabelsToSynchronize; len(rules) != 0 {
Serge Bazanski1e399142024-10-22 10:58:15 +0000240 t.Fatalf("Wanted 0 label rules in config after reconfiguration, have %d: %v", len(rules), rules)
241 }
242 // TODO: ensure new rules get applied, but that will require watching the cluster
243 // config for changes in the labelmaker.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000244}
245
Serge Bazanski99b02142024-04-17 16:33:28 +0200246// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
247//
248// The tests are performed against an in-memory cluster.
249func TestE2EKubernetes(t *testing.T) {
250 // Set a global timeout to make sure this terminates
251 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
252 defer cancel()
253
Tim Windelschmidt82e6af72024-07-23 00:05:42 +0000254 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +0200255 if err != nil {
256 t.Fatalf("Reading registry manifest failed: %v", err)
257 }
258 lr, err := localregistry.FromBazelManifest(df)
259 if err != nil {
260 t.Fatalf("Creating test image registry failed: %v", err)
261 }
262
263 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200264 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +0200265 NumNodes: 2,
266 LocalRegistry: lr,
Lorenz Brun732a8842024-08-26 23:25:37 +0200267 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +0100268 ClusterDomain: "cluster.test",
Lorenz Brun732a8842024-08-26 23:25:37 +0200269 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
270 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
271 },
Serge Bazanski99b02142024-04-17 16:33:28 +0200272 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200273 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +0200274 if err != nil {
275 t.Fatalf("LaunchCluster failed: %v", err)
276 }
277 defer func() {
278 err := cluster.Close()
279 if err != nil {
280 t.Fatalf("cluster Close failed: %v", err)
281 }
282 }()
283
Lorenz Brun8f1254d2025-01-28 14:10:05 +0100284 clientSet, _, err := cluster.GetKubeClientSet()
Serge Bazanski99b02142024-04-17 16:33:28 +0200285 if err != nil {
286 t.Fatal(err)
287 }
288 util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
289 // Make everything but the first node into KubernetesWorkers.
290 for i := 1; i < clusterOptions.NumNodes; i++ {
291 err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
292 if err != nil {
293 return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
294 }
295 }
296 return nil
297 })
298 util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
299 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
300 if err != nil {
301 return err
302 }
303 if len(nodes.Items) < 1 {
304 return errors.New("node not yet registered")
305 }
306 node := nodes.Items[0]
307 for _, cond := range node.Status.Conditions {
308 if cond.Type != corev1.NodeReady {
309 continue
310 }
311 if cond.Status != corev1.ConditionTrue {
312 return fmt.Errorf("node not ready: %v", cond.Message)
313 }
314 }
315 return nil
316 })
317 util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
318 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
319 return err
320 })
321 util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
322 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
323 if err != nil {
324 return err
325 }
326 if len(res.Items) == 0 {
327 return errors.New("pod didn't get created")
328 }
329 pod := res.Items[0]
330 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
331 return nil
332 }
333 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
334 if err != nil || len(events.Items) == 0 {
335 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
336 } else {
337 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
338 }
339 })
340 util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
341 deployment := makeTestDeploymentSpec("test-deploy-2")
342 gvisorStr := "gvisor"
343 deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
344 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
345 return err
346 })
347 util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
348 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
349 if err != nil {
350 return err
351 }
352 if len(res.Items) == 0 {
353 return errors.New("pod didn't get created")
354 }
355 pod := res.Items[0]
356 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
357 return nil
358 }
359 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
360 if err != nil || len(events.Items) == 0 {
361 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
362 } else {
363 var errorMsg strings.Builder
364 for _, msg := range events.Items {
365 errorMsg.WriteString(" | ")
366 errorMsg.WriteString(msg.Message)
367 }
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200368 return fmt.Errorf("pod is not ready: %s", errorMsg.String())
Serge Bazanski99b02142024-04-17 16:33:28 +0200369 }
370 })
Jan Schär73beb692024-11-27 17:47:09 +0100371 for _, runtimeClass := range []string{"runc", "gvisor"} {
372 statefulSetName := fmt.Sprintf("test-statefulset-%s", runtimeClass)
373 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
374 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet(statefulSetName, runtimeClass), metav1.CreateOptions{})
Serge Bazanski99b02142024-04-17 16:33:28 +0200375 return err
Jan Schär73beb692024-11-27 17:47:09 +0100376 })
377 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests successful", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
378 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("name=%s", statefulSetName)})
379 if err != nil {
380 return err
Jan Schär652c2ad2024-11-19 17:40:50 +0100381 }
Jan Schär73beb692024-11-27 17:47:09 +0100382 if len(res.Items) == 0 {
383 return errors.New("pod didn't get created")
384 }
385 pod := res.Items[0]
386 lines, err := getPodLogLines(ctx, clientSet, pod.Name, 50)
387 if err != nil {
388 return fmt.Errorf("could not get logs: %w", err)
389 }
390 if len(lines) > 0 {
391 switch lines[len(lines)-1] {
392 case "[TESTS-PASSED]":
393 return nil
394 case "[TESTS-FAILED]":
395 return util.Permanent(fmt.Errorf("tests failed, log:\n %s", strings.Join(lines, "\n ")))
396 }
397 }
398 return fmt.Errorf("pod is not ready: %v, log:\n %s", pod.Status.Phase, strings.Join(lines, "\n "))
399 })
400 }
Lorenz Brun2ecccae2024-11-27 22:03:35 +0100401 util.TestEventual(t, "Deployment in user namespace", ctx, largeTestTimeout, func(ctx context.Context) error {
402 deployment := makeTestDeploymentSpec("test-userns-1")
403 deployment.Spec.Template.Spec.HostUsers = ptr.To(false)
404 deployment.Spec.Template.Spec.Containers[0].ReadinessProbe.HTTPGet.Path = "/ready_userns"
405 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
406 return err
407 })
408 util.TestEventual(t, "Deployment in user namespace is running", ctx, largeTestTimeout, func(ctx context.Context) error {
409 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-userns-1"})
410 if err != nil {
411 return err
412 }
413 if len(res.Items) == 0 {
414 return errors.New("pod didn't get created")
415 }
416 pod := res.Items[0]
417 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
418 return nil
419 }
420 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
421 if err != nil || len(events.Items) == 0 {
422 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
423 } else {
424 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
425 }
426 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200427 util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
428 _, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
429 return err
430 })
431 util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
432 res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
433 if err != nil {
434 return err
435 }
436 if res.Status.Failed > 0 {
437 pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
438 LabelSelector: "job-name=selftest",
439 })
440 if err != nil {
441 return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
442 }
443 if len(pods.Items) < 1 {
444 return fmt.Errorf("job failed but pod does not exist")
445 }
446 lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
447 if err != nil {
448 return fmt.Errorf("job failed but could not get logs: %w", err)
449 }
450 if len(lines) > 0 {
451 return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
452 }
453 return util.Permanent(fmt.Errorf("job failed, empty log"))
454 }
455 if res.Status.Succeeded > 0 {
456 return nil
457 }
458 return fmt.Errorf("job still running")
459 })
460 util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
461 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
462 if err != nil && !kerrors.IsAlreadyExists(err) {
463 return err
464 }
465 _, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
466 if err != nil && !kerrors.IsAlreadyExists(err) {
467 return err
468 }
469 return nil
470 })
471 util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
472 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
473 if err != nil {
474 return err
475 }
476 // Use a new client for each attempt
477 hc := http.Client{
478 Timeout: 2 * time.Second,
479 Transport: &http.Transport{
480 Dial: cluster.SOCKSDialer.Dial,
481 },
482 }
483 for _, n := range nodes.Items {
484 var addr string
485 for _, a := range n.Status.Addresses {
486 if a.Type == corev1.NodeInternalIP {
487 addr = a.Address
488 }
489 }
490 u := url.URL{Scheme: "http", Host: addr, Path: "/"}
491 res, err := hc.Get(u.String())
492 if err != nil {
493 return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
494 }
495 if res.StatusCode != http.StatusOK {
496 return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
497 }
498 t.Logf("Got response from %q", n.Name)
499 }
500 return nil
501 })
502 util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
503 pool := x509.NewCertPool()
504 pool.AddCert(cluster.CACertificate)
505 cl := http.Client{
506 Transport: &http.Transport{
507 TLSClientConfig: &tls.Config{
508 Certificates: []tls.Certificate{cluster.Owner},
509 RootCAs: pool,
510 },
511 DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
512 return cluster.DialNode(ctx, addr)
513 },
514 },
515 }
516 u := url.URL{
517 Scheme: "https",
518 Host: net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
519 Path: "/metrics/containerd",
520 }
521 res, err := cl.Get(u.String())
522 if err != nil {
523 return err
524 }
525 defer res.Body.Close()
526 if res.StatusCode != 200 {
527 return fmt.Errorf("status code %d", res.StatusCode)
528 }
529
530 body, err := io.ReadAll(res.Body)
531 if err != nil {
532 return err
533 }
534 needle := "containerd_build_info_total"
535 if !strings.Contains(string(body), needle) {
536 return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
537 }
538 return nil
539 })
Serge Bazanski99b02142024-04-17 16:33:28 +0200540}