blob: baaa2355b17c13e710660dde5cc8cba3adc29063 [file] [log] [blame]
Serge Bazanski99b02142024-04-17 16:33:28 +02001package kubernetes
2
3import (
4 "context"
5 "crypto/tls"
6 "crypto/x509"
7 "errors"
8 "fmt"
9 "io"
10 "net"
11 "net/http"
12 _ "net/http/pprof"
13 "net/url"
14 "os"
15 "strings"
16 "testing"
17 "time"
18
19 "github.com/bazelbuild/rules_go/go/runfiles"
Serge Bazanski1e399142024-10-22 10:58:15 +000020 "google.golang.org/protobuf/types/known/fieldmaskpb"
Serge Bazanski99b02142024-04-17 16:33:28 +020021 corev1 "k8s.io/api/core/v1"
22 kerrors "k8s.io/apimachinery/pkg/api/errors"
23 "k8s.io/apimachinery/pkg/api/resource"
24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
26
Lorenz Brun732a8842024-08-26 23:25:37 +020027 common "source.monogon.dev/metropolis/node"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000028 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun732a8842024-08-26 23:25:37 +020029 cpb "source.monogon.dev/metropolis/proto/common"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020030 mlaunch "source.monogon.dev/metropolis/test/launch"
31 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020032 "source.monogon.dev/metropolis/test/util"
Serge Bazanski99b02142024-04-17 16:33:28 +020033)
34
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000035var (
36 // These are filled by bazel at linking time with the canonical path of
37 // their corresponding file. Inside the init function we resolve it
38 // with the rules_go runfiles package to the real path.
39 xTestImagesManifestPath string
40)
41
42func init() {
43 var err error
44 for _, path := range []*string{
45 &xTestImagesManifestPath,
46 } {
47 *path, err = runfiles.Rlocation(*path)
48 if err != nil {
49 panic(err)
50 }
51 }
52}
53
Serge Bazanski99b02142024-04-17 16:33:28 +020054const (
55 // Timeout for the global test context.
56 //
57 // Bazel would eventually time out the test after 900s ("large") if, for
58 // some reason, the context cancellation fails to abort it.
59 globalTestTimeout = 600 * time.Second
60
61 // Timeouts for individual end-to-end tests of different sizes.
62 smallTestTimeout = 60 * time.Second
63 largeTestTimeout = 120 * time.Second
64)
65
Serge Bazanski6d1ff362024-09-30 15:15:31 +000066// TestE2EKubernetesLabels verifies that Kubernetes node labels are being updated
67// when the cluster state changes.
68func TestE2EKubernetesLabels(t *testing.T) {
69 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
70 defer cancel()
71
72 clusterOptions := mlaunch.ClusterOptions{
73 NumNodes: 2,
74 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +010075 ClusterDomain: "cluster.test",
Serge Bazanski6d1ff362024-09-30 15:15:31 +000076 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
77 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
Serge Bazanski78567602024-10-31 13:42:04 +000078 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
79 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanskie99638e2024-09-30 17:06:44 +000080 {Regexp: `^test\.monogon\.dev/`},
81 },
82 },
Serge Bazanski6d1ff362024-09-30 15:15:31 +000083 },
84 }
85 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
86 if err != nil {
87 t.Fatalf("LaunchCluster failed: %v", err)
88 }
89 defer func() {
90 err := cluster.Close()
91 if err != nil {
92 t.Fatalf("cluster Close failed: %v", err)
93 }
94 }()
95
96 con, err := cluster.CuratorClient()
97 if err != nil {
98 t.Fatalf("Could not get curator client: %v", err)
99 }
100 mgmt := apb.NewManagementClient(con)
101 clientSet, err := cluster.GetKubeClientSet()
102 if err != nil {
103 t.Fatal(err)
104 }
105
106 getLabelsForNode := func(nid string) common.Labels {
107 node, err := clientSet.CoreV1().Nodes().Get(ctx, nid, metav1.GetOptions{})
108 if kerrors.IsNotFound(err) {
109 return nil
110 }
111 if err != nil {
112 t.Fatalf("Could not get node %s: %v", nid, err)
113 return nil
114 }
115 return common.Labels(node.Labels).Filter(func(k, v string) bool {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000116 if strings.HasPrefix(k, "node-role.kubernetes.io/") {
117 return true
118 }
119 if strings.HasPrefix(k, "test.monogon.dev/") {
120 return true
121 }
122 return false
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000123 })
124 }
125
126 // Nodes should have no labels at first.
127 for _, nid := range cluster.NodeIDs {
128 if labels := getLabelsForNode(nid); !labels.Equals(nil) {
129 t.Errorf("Node %s should have no labels, has %s", nid, labels)
130 }
131 }
132 // Nominate both nodes to be Kubernetes workers.
133 for _, nid := range cluster.NodeIDs {
134 yes := true
135 _, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
136 Node: &apb.UpdateNodeRolesRequest_Id{
137 Id: nid,
138 },
139 KubernetesWorker: &yes,
140 })
141 if err != nil {
142 t.Fatalf("Could not make %s a KubernetesWorker: %v", nid, err)
143 }
144 }
145
Jan Schär36f03752024-11-19 17:41:05 +0100146 util.MustTestEventual(t, "Labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000147 // Nodes should have role labels now.
148 for _, nid := range cluster.NodeIDs {
149 want := common.Labels{
150 "node-role.kubernetes.io/KubernetesWorker": "",
151 }
152 if nid == cluster.NodeIDs[0] {
153 want["node-role.kubernetes.io/KubernetesController"] = ""
154 want["node-role.kubernetes.io/ConsensusMember"] = ""
155 }
156 if labels := getLabelsForNode(nid); !want.Equals(labels) {
157 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
158 }
159 }
160 return nil
161 })
162
163 // Remove KubernetesWorker from first node again. It will stay in k8s (arguably,
164 // this is a bug) but its role label should be removed.
165 no := false
166 _, err = mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
167 Node: &apb.UpdateNodeRolesRequest_Id{
168 Id: cluster.NodeIDs[0],
169 },
170 KubernetesWorker: &no,
171 })
172 if err != nil {
173 t.Fatalf("Could not remove KubernetesWorker from %s: %v", cluster.NodeIDs[0], err)
174 }
175
Jan Schär36f03752024-11-19 17:41:05 +0100176 util.MustTestEventual(t, "Labels removed", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000177 for _, nid := range cluster.NodeIDs {
178 want := make(common.Labels)
179 if nid == cluster.NodeIDs[0] {
180 want["node-role.kubernetes.io/KubernetesController"] = ""
181 want["node-role.kubernetes.io/ConsensusMember"] = ""
182 } else {
183 want["node-role.kubernetes.io/KubernetesWorker"] = ""
184 }
185 if labels := getLabelsForNode(nid); !want.Equals(labels) {
186 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
187 }
188 }
189 return nil
190 })
Serge Bazanskie99638e2024-09-30 17:06:44 +0000191
192 // Add Metropolis node label, ensure it gets reflected on the Kubernetes node.
193 _, err = mgmt.UpdateNodeLabels(ctx, &apb.UpdateNodeLabelsRequest{
194 Node: &apb.UpdateNodeLabelsRequest_Id{
195 Id: cluster.NodeIDs[1],
196 },
197 Upsert: []*apb.UpdateNodeLabelsRequest_Pair{
198 {Key: "test.monogon.dev/foo", Value: "bar"},
199 },
200 })
201
Jan Schär36f03752024-11-19 17:41:05 +0100202 util.MustTestEventual(t, "Metropolis labels added", ctx, smallTestTimeout, func(ctx context.Context) error {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000203 if err != nil {
204 t.Fatalf("Could not add label to node: %v", err)
205 }
206 want := common.Labels{
207 "node-role.kubernetes.io/KubernetesWorker": "",
208 "test.monogon.dev/foo": "bar",
209 }
210 if labels := getLabelsForNode(cluster.NodeIDs[1]); !want.Equals(labels) {
Serge Bazanski1e399142024-10-22 10:58:15 +0000211 return fmt.Errorf("node %s should have labels %s, has %s", cluster.NodeIDs[1], want, labels)
Serge Bazanskie99638e2024-09-30 17:06:44 +0000212 }
213 return nil
214 })
Serge Bazanski1e399142024-10-22 10:58:15 +0000215
216 // Reconfigure node label rules.
217 _, err = mgmt.ConfigureCluster(ctx, &apb.ConfigureClusterRequest{
218 BaseConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000219 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{
220 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_Kubernetes_NodeLabelsToSynchronize{
Serge Bazanski1e399142024-10-22 10:58:15 +0000221 {Regexp: `^test\.monogon\.dev/`},
222 },
223 },
224 },
225 NewConfig: &cpb.ClusterConfiguration{
Serge Bazanski78567602024-10-31 13:42:04 +0000226 Kubernetes: &cpb.ClusterConfiguration_Kubernetes{},
Serge Bazanski1e399142024-10-22 10:58:15 +0000227 },
228 UpdateMask: &fieldmaskpb.FieldMask{
Serge Bazanski78567602024-10-31 13:42:04 +0000229 Paths: []string{"kubernetes.node_labels_to_synchronize"},
Serge Bazanski1e399142024-10-22 10:58:15 +0000230 },
231 })
232 if err != nil {
233 t.Fatalf("Could not update cluster configuration: %v", err)
234 }
235
236 ci, err := mgmt.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
237 if err != nil {
238 t.Fatalf("Could not get cluster info")
239 }
240 // See if the config changed.
Serge Bazanski78567602024-10-31 13:42:04 +0000241 if rules := ci.ClusterConfiguration.Kubernetes.NodeLabelsToSynchronize; len(rules) != 0 {
Serge Bazanski1e399142024-10-22 10:58:15 +0000242 t.Fatalf("Wanted 0 label rules in config after reconfiguration, have %d: %v", len(rules), rules)
243 }
244 // TODO: ensure new rules get applied, but that will require watching the cluster
245 // config for changes in the labelmaker.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000246}
247
Serge Bazanski99b02142024-04-17 16:33:28 +0200248// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
249//
250// The tests are performed against an in-memory cluster.
251func TestE2EKubernetes(t *testing.T) {
252 // Set a global timeout to make sure this terminates
253 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
254 defer cancel()
255
Tim Windelschmidt82e6af72024-07-23 00:05:42 +0000256 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +0200257 if err != nil {
258 t.Fatalf("Reading registry manifest failed: %v", err)
259 }
260 lr, err := localregistry.FromBazelManifest(df)
261 if err != nil {
262 t.Fatalf("Creating test image registry failed: %v", err)
263 }
264
265 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200266 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +0200267 NumNodes: 2,
268 LocalRegistry: lr,
Lorenz Brun732a8842024-08-26 23:25:37 +0200269 InitialClusterConfiguration: &cpb.ClusterConfiguration{
Jan Schär39f4f5c2024-10-29 09:41:50 +0100270 ClusterDomain: "cluster.test",
Lorenz Brun732a8842024-08-26 23:25:37 +0200271 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
272 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
273 },
Serge Bazanski99b02142024-04-17 16:33:28 +0200274 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200275 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +0200276 if err != nil {
277 t.Fatalf("LaunchCluster failed: %v", err)
278 }
279 defer func() {
280 err := cluster.Close()
281 if err != nil {
282 t.Fatalf("cluster Close failed: %v", err)
283 }
284 }()
285
286 clientSet, err := cluster.GetKubeClientSet()
287 if err != nil {
288 t.Fatal(err)
289 }
290 util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
291 // Make everything but the first node into KubernetesWorkers.
292 for i := 1; i < clusterOptions.NumNodes; i++ {
293 err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
294 if err != nil {
295 return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
296 }
297 }
298 return nil
299 })
300 util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
301 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
302 if err != nil {
303 return err
304 }
305 if len(nodes.Items) < 1 {
306 return errors.New("node not yet registered")
307 }
308 node := nodes.Items[0]
309 for _, cond := range node.Status.Conditions {
310 if cond.Type != corev1.NodeReady {
311 continue
312 }
313 if cond.Status != corev1.ConditionTrue {
314 return fmt.Errorf("node not ready: %v", cond.Message)
315 }
316 }
317 return nil
318 })
319 util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
320 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
321 return err
322 })
323 util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
324 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
325 if err != nil {
326 return err
327 }
328 if len(res.Items) == 0 {
329 return errors.New("pod didn't get created")
330 }
331 pod := res.Items[0]
332 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
333 return nil
334 }
335 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
336 if err != nil || len(events.Items) == 0 {
337 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
338 } else {
339 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
340 }
341 })
342 util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
343 deployment := makeTestDeploymentSpec("test-deploy-2")
344 gvisorStr := "gvisor"
345 deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
346 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
347 return err
348 })
349 util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
350 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
351 if err != nil {
352 return err
353 }
354 if len(res.Items) == 0 {
355 return errors.New("pod didn't get created")
356 }
357 pod := res.Items[0]
358 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
359 return nil
360 }
361 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
362 if err != nil || len(events.Items) == 0 {
363 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
364 } else {
365 var errorMsg strings.Builder
366 for _, msg := range events.Items {
367 errorMsg.WriteString(" | ")
368 errorMsg.WriteString(msg.Message)
369 }
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200370 return fmt.Errorf("pod is not ready: %s", errorMsg.String())
Serge Bazanski99b02142024-04-17 16:33:28 +0200371 }
372 })
Jan Schär73beb692024-11-27 17:47:09 +0100373 for _, runtimeClass := range []string{"runc", "gvisor"} {
374 statefulSetName := fmt.Sprintf("test-statefulset-%s", runtimeClass)
375 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
376 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet(statefulSetName, runtimeClass), metav1.CreateOptions{})
Serge Bazanski99b02142024-04-17 16:33:28 +0200377 return err
Jan Schär73beb692024-11-27 17:47:09 +0100378 })
379 util.TestEventual(t, fmt.Sprintf("StatefulSet with %s tests successful", runtimeClass), ctx, smallTestTimeout, func(ctx context.Context) error {
380 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("name=%s", statefulSetName)})
381 if err != nil {
382 return err
Jan Schär652c2ad2024-11-19 17:40:50 +0100383 }
Jan Schär73beb692024-11-27 17:47:09 +0100384 if len(res.Items) == 0 {
385 return errors.New("pod didn't get created")
386 }
387 pod := res.Items[0]
388 lines, err := getPodLogLines(ctx, clientSet, pod.Name, 50)
389 if err != nil {
390 return fmt.Errorf("could not get logs: %w", err)
391 }
392 if len(lines) > 0 {
393 switch lines[len(lines)-1] {
394 case "[TESTS-PASSED]":
395 return nil
396 case "[TESTS-FAILED]":
397 return util.Permanent(fmt.Errorf("tests failed, log:\n %s", strings.Join(lines, "\n ")))
398 }
399 }
400 return fmt.Errorf("pod is not ready: %v, log:\n %s", pod.Status.Phase, strings.Join(lines, "\n "))
401 })
402 }
Serge Bazanski99b02142024-04-17 16:33:28 +0200403 util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
404 _, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
405 return err
406 })
407 util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
408 res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
409 if err != nil {
410 return err
411 }
412 if res.Status.Failed > 0 {
413 pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
414 LabelSelector: "job-name=selftest",
415 })
416 if err != nil {
417 return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
418 }
419 if len(pods.Items) < 1 {
420 return fmt.Errorf("job failed but pod does not exist")
421 }
422 lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
423 if err != nil {
424 return fmt.Errorf("job failed but could not get logs: %w", err)
425 }
426 if len(lines) > 0 {
427 return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
428 }
429 return util.Permanent(fmt.Errorf("job failed, empty log"))
430 }
431 if res.Status.Succeeded > 0 {
432 return nil
433 }
434 return fmt.Errorf("job still running")
435 })
436 util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
437 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
438 if err != nil && !kerrors.IsAlreadyExists(err) {
439 return err
440 }
441 _, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
442 if err != nil && !kerrors.IsAlreadyExists(err) {
443 return err
444 }
445 return nil
446 })
447 util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
448 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
449 if err != nil {
450 return err
451 }
452 // Use a new client for each attempt
453 hc := http.Client{
454 Timeout: 2 * time.Second,
455 Transport: &http.Transport{
456 Dial: cluster.SOCKSDialer.Dial,
457 },
458 }
459 for _, n := range nodes.Items {
460 var addr string
461 for _, a := range n.Status.Addresses {
462 if a.Type == corev1.NodeInternalIP {
463 addr = a.Address
464 }
465 }
466 u := url.URL{Scheme: "http", Host: addr, Path: "/"}
467 res, err := hc.Get(u.String())
468 if err != nil {
469 return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
470 }
471 if res.StatusCode != http.StatusOK {
472 return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
473 }
474 t.Logf("Got response from %q", n.Name)
475 }
476 return nil
477 })
478 util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
479 pool := x509.NewCertPool()
480 pool.AddCert(cluster.CACertificate)
481 cl := http.Client{
482 Transport: &http.Transport{
483 TLSClientConfig: &tls.Config{
484 Certificates: []tls.Certificate{cluster.Owner},
485 RootCAs: pool,
486 },
487 DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
488 return cluster.DialNode(ctx, addr)
489 },
490 },
491 }
492 u := url.URL{
493 Scheme: "https",
494 Host: net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
495 Path: "/metrics/containerd",
496 }
497 res, err := cl.Get(u.String())
498 if err != nil {
499 return err
500 }
501 defer res.Body.Close()
502 if res.StatusCode != 200 {
503 return fmt.Errorf("status code %d", res.StatusCode)
504 }
505
506 body, err := io.ReadAll(res.Body)
507 if err != nil {
508 return err
509 }
510 needle := "containerd_build_info_total"
511 if !strings.Contains(string(body), needle) {
512 return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
513 }
514 return nil
515 })
516 if os.Getenv("HAVE_NESTED_KVM") != "" {
517 util.TestEventual(t, "Pod for KVM/QEMU smoke test", ctx, smallTestTimeout, func(ctx context.Context) error {
518 runcRuntimeClass := "runc"
519 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
520 ObjectMeta: metav1.ObjectMeta{
521 Name: "vm-smoketest",
522 },
523 Spec: corev1.PodSpec{
524 Containers: []corev1.Container{{
525 Name: "vm-smoketest",
526 ImagePullPolicy: corev1.PullNever,
527 Image: "test.monogon.internal/metropolis/vm/smoketest:smoketest_container",
528 Resources: corev1.ResourceRequirements{
529 Limits: corev1.ResourceList{
530 "devices.monogon.dev/kvm": *resource.NewQuantity(1, ""),
531 },
532 },
533 }},
534 RuntimeClassName: &runcRuntimeClass,
535 RestartPolicy: corev1.RestartPolicyNever,
536 },
537 }, metav1.CreateOptions{})
538 return err
539 })
540 util.TestEventual(t, "KVM/QEMU smoke test completion", ctx, smallTestTimeout, func(ctx context.Context) error {
541 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "vm-smoketest", metav1.GetOptions{})
542 if err != nil {
543 return fmt.Errorf("failed to get pod: %w", err)
544 }
545 if pod.Status.Phase == corev1.PodSucceeded {
546 return nil
547 }
548 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
549 if err != nil || len(events.Items) == 0 {
550 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
551 } else {
552 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
553 }
554 })
555 }
556}