blob: 80a829291aabba3f2ed761e172fe967f0e712485 [file] [log] [blame]
Serge Bazanski99b02142024-04-17 16:33:28 +02001package kubernetes
2
3import (
4 "context"
5 "crypto/tls"
6 "crypto/x509"
7 "errors"
8 "fmt"
9 "io"
10 "net"
11 "net/http"
12 _ "net/http/pprof"
13 "net/url"
14 "os"
15 "strings"
16 "testing"
17 "time"
18
19 "github.com/bazelbuild/rules_go/go/runfiles"
Serge Bazanski1e399142024-10-22 10:58:15 +000020 "google.golang.org/protobuf/types/known/fieldmaskpb"
Serge Bazanski99b02142024-04-17 16:33:28 +020021 corev1 "k8s.io/api/core/v1"
22 kerrors "k8s.io/apimachinery/pkg/api/errors"
23 "k8s.io/apimachinery/pkg/api/resource"
24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
26
Lorenz Brun732a8842024-08-26 23:25:37 +020027 common "source.monogon.dev/metropolis/node"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000028 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun732a8842024-08-26 23:25:37 +020029 cpb "source.monogon.dev/metropolis/proto/common"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020030 mlaunch "source.monogon.dev/metropolis/test/launch"
31 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020032 "source.monogon.dev/metropolis/test/util"
Serge Bazanski99b02142024-04-17 16:33:28 +020033)
34
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000035var (
36 // These are filled by bazel at linking time with the canonical path of
37 // their corresponding file. Inside the init function we resolve it
38 // with the rules_go runfiles package to the real path.
39 xTestImagesManifestPath string
40)
41
42func init() {
43 var err error
44 for _, path := range []*string{
45 &xTestImagesManifestPath,
46 } {
47 *path, err = runfiles.Rlocation(*path)
48 if err != nil {
49 panic(err)
50 }
51 }
52}
53
Serge Bazanski99b02142024-04-17 16:33:28 +020054const (
55 // Timeout for the global test context.
56 //
57 // Bazel would eventually time out the test after 900s ("large") if, for
58 // some reason, the context cancellation fails to abort it.
59 globalTestTimeout = 600 * time.Second
60
61 // Timeouts for individual end-to-end tests of different sizes.
62 smallTestTimeout = 60 * time.Second
63 largeTestTimeout = 120 * time.Second
64)
65
Serge Bazanski6d1ff362024-09-30 15:15:31 +000066// TestE2EKubernetesLabels verifies that Kubernetes node labels are being updated
67// when the cluster state changes.
68func TestE2EKubernetesLabels(t *testing.T) {
69 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
70 defer cancel()
71
72 clusterOptions := mlaunch.ClusterOptions{
73 NumNodes: 2,
74 InitialClusterConfiguration: &cpb.ClusterConfiguration{
75 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
76 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
Serge Bazanskie99638e2024-09-30 17:06:44 +000077 KubernetesConfig: &cpb.ClusterConfiguration_KubernetesConfig{
78 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_KubernetesConfig_NodeLabelsToSynchronize{
79 {Regexp: `^test\.monogon\.dev/`},
80 },
81 },
Serge Bazanski6d1ff362024-09-30 15:15:31 +000082 },
83 }
84 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
85 if err != nil {
86 t.Fatalf("LaunchCluster failed: %v", err)
87 }
88 defer func() {
89 err := cluster.Close()
90 if err != nil {
91 t.Fatalf("cluster Close failed: %v", err)
92 }
93 }()
94
95 con, err := cluster.CuratorClient()
96 if err != nil {
97 t.Fatalf("Could not get curator client: %v", err)
98 }
99 mgmt := apb.NewManagementClient(con)
100 clientSet, err := cluster.GetKubeClientSet()
101 if err != nil {
102 t.Fatal(err)
103 }
104
105 getLabelsForNode := func(nid string) common.Labels {
106 node, err := clientSet.CoreV1().Nodes().Get(ctx, nid, metav1.GetOptions{})
107 if kerrors.IsNotFound(err) {
108 return nil
109 }
110 if err != nil {
111 t.Fatalf("Could not get node %s: %v", nid, err)
112 return nil
113 }
114 return common.Labels(node.Labels).Filter(func(k, v string) bool {
Serge Bazanskie99638e2024-09-30 17:06:44 +0000115 if strings.HasPrefix(k, "node-role.kubernetes.io/") {
116 return true
117 }
118 if strings.HasPrefix(k, "test.monogon.dev/") {
119 return true
120 }
121 return false
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000122 })
123 }
124
125 // Nodes should have no labels at first.
126 for _, nid := range cluster.NodeIDs {
127 if labels := getLabelsForNode(nid); !labels.Equals(nil) {
128 t.Errorf("Node %s should have no labels, has %s", nid, labels)
129 }
130 }
131 // Nominate both nodes to be Kubernetes workers.
132 for _, nid := range cluster.NodeIDs {
133 yes := true
134 _, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
135 Node: &apb.UpdateNodeRolesRequest_Id{
136 Id: nid,
137 },
138 KubernetesWorker: &yes,
139 })
140 if err != nil {
141 t.Fatalf("Could not make %s a KubernetesWorker: %v", nid, err)
142 }
143 }
144
145 util.MustTestEventual(t, "Labels added", ctx, time.Second*5, func(ctx context.Context) error {
146 // Nodes should have role labels now.
147 for _, nid := range cluster.NodeIDs {
148 want := common.Labels{
149 "node-role.kubernetes.io/KubernetesWorker": "",
150 }
151 if nid == cluster.NodeIDs[0] {
152 want["node-role.kubernetes.io/KubernetesController"] = ""
153 want["node-role.kubernetes.io/ConsensusMember"] = ""
154 }
155 if labels := getLabelsForNode(nid); !want.Equals(labels) {
156 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
157 }
158 }
159 return nil
160 })
161
162 // Remove KubernetesWorker from first node again. It will stay in k8s (arguably,
163 // this is a bug) but its role label should be removed.
164 no := false
165 _, err = mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
166 Node: &apb.UpdateNodeRolesRequest_Id{
167 Id: cluster.NodeIDs[0],
168 },
169 KubernetesWorker: &no,
170 })
171 if err != nil {
172 t.Fatalf("Could not remove KubernetesWorker from %s: %v", cluster.NodeIDs[0], err)
173 }
174
175 util.MustTestEventual(t, "Labels removed", ctx, time.Second*5, func(ctx context.Context) error {
176 for _, nid := range cluster.NodeIDs {
177 want := make(common.Labels)
178 if nid == cluster.NodeIDs[0] {
179 want["node-role.kubernetes.io/KubernetesController"] = ""
180 want["node-role.kubernetes.io/ConsensusMember"] = ""
181 } else {
182 want["node-role.kubernetes.io/KubernetesWorker"] = ""
183 }
184 if labels := getLabelsForNode(nid); !want.Equals(labels) {
185 return fmt.Errorf("node %s should have labels %s, has %s", nid, want, labels)
186 }
187 }
188 return nil
189 })
Serge Bazanskie99638e2024-09-30 17:06:44 +0000190
191 // Add Metropolis node label, ensure it gets reflected on the Kubernetes node.
192 _, err = mgmt.UpdateNodeLabels(ctx, &apb.UpdateNodeLabelsRequest{
193 Node: &apb.UpdateNodeLabelsRequest_Id{
194 Id: cluster.NodeIDs[1],
195 },
196 Upsert: []*apb.UpdateNodeLabelsRequest_Pair{
197 {Key: "test.monogon.dev/foo", Value: "bar"},
198 },
199 })
200
201 util.MustTestEventual(t, "Metropolis labels added", ctx, time.Second*5, func(ctx context.Context) error {
202 if err != nil {
203 t.Fatalf("Could not add label to node: %v", err)
204 }
205 want := common.Labels{
206 "node-role.kubernetes.io/KubernetesWorker": "",
207 "test.monogon.dev/foo": "bar",
208 }
209 if labels := getLabelsForNode(cluster.NodeIDs[1]); !want.Equals(labels) {
Serge Bazanski1e399142024-10-22 10:58:15 +0000210 return fmt.Errorf("node %s should have labels %s, has %s", cluster.NodeIDs[1], want, labels)
Serge Bazanskie99638e2024-09-30 17:06:44 +0000211 }
212 return nil
213 })
Serge Bazanski1e399142024-10-22 10:58:15 +0000214
215 // Reconfigure node label rules.
216 _, err = mgmt.ConfigureCluster(ctx, &apb.ConfigureClusterRequest{
217 BaseConfig: &cpb.ClusterConfiguration{
218 KubernetesConfig: &cpb.ClusterConfiguration_KubernetesConfig{
219 NodeLabelsToSynchronize: []*cpb.ClusterConfiguration_KubernetesConfig_NodeLabelsToSynchronize{
220 {Regexp: `^test\.monogon\.dev/`},
221 },
222 },
223 },
224 NewConfig: &cpb.ClusterConfiguration{
225 KubernetesConfig: &cpb.ClusterConfiguration_KubernetesConfig{},
226 },
227 UpdateMask: &fieldmaskpb.FieldMask{
228 Paths: []string{"kubernetes_config.node_labels_to_synchronize"},
229 },
230 })
231 if err != nil {
232 t.Fatalf("Could not update cluster configuration: %v", err)
233 }
234
235 ci, err := mgmt.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
236 if err != nil {
237 t.Fatalf("Could not get cluster info")
238 }
239 // See if the config changed.
240 if rules := ci.ClusterConfiguration.KubernetesConfig.NodeLabelsToSynchronize; len(rules) != 0 {
241 t.Fatalf("Wanted 0 label rules in config after reconfiguration, have %d: %v", len(rules), rules)
242 }
243 // TODO: ensure new rules get applied, but that will require watching the cluster
244 // config for changes in the labelmaker.
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000245}
246
Serge Bazanski99b02142024-04-17 16:33:28 +0200247// TestE2EKubernetes exercises the Kubernetes functionality of Metropolis.
248//
249// The tests are performed against an in-memory cluster.
250func TestE2EKubernetes(t *testing.T) {
251 // Set a global timeout to make sure this terminates
252 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
253 defer cancel()
254
Tim Windelschmidt82e6af72024-07-23 00:05:42 +0000255 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +0200256 if err != nil {
257 t.Fatalf("Reading registry manifest failed: %v", err)
258 }
259 lr, err := localregistry.FromBazelManifest(df)
260 if err != nil {
261 t.Fatalf("Creating test image registry failed: %v", err)
262 }
263
264 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200265 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +0200266 NumNodes: 2,
267 LocalRegistry: lr,
Lorenz Brun732a8842024-08-26 23:25:37 +0200268 InitialClusterConfiguration: &cpb.ClusterConfiguration{
269 TpmMode: cpb.ClusterConfiguration_TPM_MODE_DISABLED,
270 StorageSecurityPolicy: cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE,
271 },
Serge Bazanski99b02142024-04-17 16:33:28 +0200272 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +0200273 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +0200274 if err != nil {
275 t.Fatalf("LaunchCluster failed: %v", err)
276 }
277 defer func() {
278 err := cluster.Close()
279 if err != nil {
280 t.Fatalf("cluster Close failed: %v", err)
281 }
282 }()
283
284 clientSet, err := cluster.GetKubeClientSet()
285 if err != nil {
286 t.Fatal(err)
287 }
288 util.TestEventual(t, "Add KubernetesWorker roles", ctx, smallTestTimeout, func(ctx context.Context) error {
289 // Make everything but the first node into KubernetesWorkers.
290 for i := 1; i < clusterOptions.NumNodes; i++ {
291 err := cluster.MakeKubernetesWorker(ctx, cluster.NodeIDs[i])
292 if err != nil {
293 return util.Permanent(fmt.Errorf("MakeKubernetesWorker: %w", err))
294 }
295 }
296 return nil
297 })
298 util.TestEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
299 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
300 if err != nil {
301 return err
302 }
303 if len(nodes.Items) < 1 {
304 return errors.New("node not yet registered")
305 }
306 node := nodes.Items[0]
307 for _, cond := range node.Status.Conditions {
308 if cond.Type != corev1.NodeReady {
309 continue
310 }
311 if cond.Status != corev1.ConditionTrue {
312 return fmt.Errorf("node not ready: %v", cond.Message)
313 }
314 }
315 return nil
316 })
317 util.TestEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
318 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
319 return err
320 })
321 util.TestEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
322 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
323 if err != nil {
324 return err
325 }
326 if len(res.Items) == 0 {
327 return errors.New("pod didn't get created")
328 }
329 pod := res.Items[0]
330 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
331 return nil
332 }
333 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
334 if err != nil || len(events.Items) == 0 {
335 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
336 } else {
337 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
338 }
339 })
340 util.TestEventual(t, "Simple deployment with gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
341 deployment := makeTestDeploymentSpec("test-deploy-2")
342 gvisorStr := "gvisor"
343 deployment.Spec.Template.Spec.RuntimeClassName = &gvisorStr
344 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
345 return err
346 })
347 util.TestEventual(t, "Simple deployment is running on gvisor", ctx, largeTestTimeout, func(ctx context.Context) error {
348 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
349 if err != nil {
350 return err
351 }
352 if len(res.Items) == 0 {
353 return errors.New("pod didn't get created")
354 }
355 pod := res.Items[0]
356 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
357 return nil
358 }
359 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
360 if err != nil || len(events.Items) == 0 {
361 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
362 } else {
363 var errorMsg strings.Builder
364 for _, msg := range events.Items {
365 errorMsg.WriteString(" | ")
366 errorMsg.WriteString(msg.Message)
367 }
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200368 return fmt.Errorf("pod is not ready: %s", errorMsg.String())
Serge Bazanski99b02142024-04-17 16:33:28 +0200369 }
370 })
371 util.TestEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
372 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1", corev1.PersistentVolumeFilesystem), metav1.CreateOptions{})
373 return err
374 })
375 util.TestEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
376 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
377 if err != nil {
378 return err
379 }
380 if len(res.Items) == 0 {
381 return errors.New("pod didn't get created")
382 }
383 pod := res.Items[0]
384 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
385 return nil
386 }
387 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
388 if err != nil || len(events.Items) == 0 {
389 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
390 } else {
391 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
392 }
393 })
394 util.TestEventual(t, "Simple StatefulSet with Block PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
395 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-2", corev1.PersistentVolumeBlock), metav1.CreateOptions{})
396 return err
397 })
398 util.TestEventual(t, "Simple StatefulSet with Block PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
399 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-2"})
400 if err != nil {
401 return err
402 }
403 if len(res.Items) == 0 {
404 return errors.New("pod didn't get created")
405 }
406 pod := res.Items[0]
407 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
408 return nil
409 }
410 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
411 if err != nil || len(events.Items) == 0 {
412 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
413 } else {
414 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
415 }
416 })
417 util.TestEventual(t, "In-cluster self-test job", ctx, smallTestTimeout, func(ctx context.Context) error {
418 _, err := clientSet.BatchV1().Jobs("default").Create(ctx, makeSelftestSpec("selftest"), metav1.CreateOptions{})
419 return err
420 })
421 util.TestEventual(t, "In-cluster self-test job passed", ctx, smallTestTimeout, func(ctx context.Context) error {
422 res, err := clientSet.BatchV1().Jobs("default").Get(ctx, "selftest", metav1.GetOptions{})
423 if err != nil {
424 return err
425 }
426 if res.Status.Failed > 0 {
427 pods, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{
428 LabelSelector: "job-name=selftest",
429 })
430 if err != nil {
431 return util.Permanent(fmt.Errorf("job failed but failed to find pod: %w", err))
432 }
433 if len(pods.Items) < 1 {
434 return fmt.Errorf("job failed but pod does not exist")
435 }
436 lines, err := getPodLogLines(ctx, clientSet, pods.Items[0].Name, 1)
437 if err != nil {
438 return fmt.Errorf("job failed but could not get logs: %w", err)
439 }
440 if len(lines) > 0 {
441 return util.Permanent(fmt.Errorf("job failed, last log line: %s", lines[0]))
442 }
443 return util.Permanent(fmt.Errorf("job failed, empty log"))
444 }
445 if res.Status.Succeeded > 0 {
446 return nil
447 }
448 return fmt.Errorf("job still running")
449 })
450 util.TestEventual(t, "Start NodePort test setup", ctx, smallTestTimeout, func(ctx context.Context) error {
451 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeHTTPServerDeploymentSpec("nodeport-server"), metav1.CreateOptions{})
452 if err != nil && !kerrors.IsAlreadyExists(err) {
453 return err
454 }
455 _, err = clientSet.CoreV1().Services("default").Create(ctx, makeHTTPServerNodePortService("nodeport-server"), metav1.CreateOptions{})
456 if err != nil && !kerrors.IsAlreadyExists(err) {
457 return err
458 }
459 return nil
460 })
461 util.TestEventual(t, "NodePort accessible from all nodes", ctx, smallTestTimeout, func(ctx context.Context) error {
462 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
463 if err != nil {
464 return err
465 }
466 // Use a new client for each attempt
467 hc := http.Client{
468 Timeout: 2 * time.Second,
469 Transport: &http.Transport{
470 Dial: cluster.SOCKSDialer.Dial,
471 },
472 }
473 for _, n := range nodes.Items {
474 var addr string
475 for _, a := range n.Status.Addresses {
476 if a.Type == corev1.NodeInternalIP {
477 addr = a.Address
478 }
479 }
480 u := url.URL{Scheme: "http", Host: addr, Path: "/"}
481 res, err := hc.Get(u.String())
482 if err != nil {
483 return fmt.Errorf("failed getting from node %q: %w", n.Name, err)
484 }
485 if res.StatusCode != http.StatusOK {
486 return fmt.Errorf("getting from node %q: HTTP %d", n.Name, res.StatusCode)
487 }
488 t.Logf("Got response from %q", n.Name)
489 }
490 return nil
491 })
492 util.TestEventual(t, "containerd metrics retrieved", ctx, smallTestTimeout, func(ctx context.Context) error {
493 pool := x509.NewCertPool()
494 pool.AddCert(cluster.CACertificate)
495 cl := http.Client{
496 Transport: &http.Transport{
497 TLSClientConfig: &tls.Config{
498 Certificates: []tls.Certificate{cluster.Owner},
499 RootCAs: pool,
500 },
501 DialContext: func(ctx context.Context, _, addr string) (net.Conn, error) {
502 return cluster.DialNode(ctx, addr)
503 },
504 },
505 }
506 u := url.URL{
507 Scheme: "https",
508 Host: net.JoinHostPort(cluster.NodeIDs[1], common.MetricsPort.PortString()),
509 Path: "/metrics/containerd",
510 }
511 res, err := cl.Get(u.String())
512 if err != nil {
513 return err
514 }
515 defer res.Body.Close()
516 if res.StatusCode != 200 {
517 return fmt.Errorf("status code %d", res.StatusCode)
518 }
519
520 body, err := io.ReadAll(res.Body)
521 if err != nil {
522 return err
523 }
524 needle := "containerd_build_info_total"
525 if !strings.Contains(string(body), needle) {
526 return util.Permanent(fmt.Errorf("could not find %q in returned response", needle))
527 }
528 return nil
529 })
530 if os.Getenv("HAVE_NESTED_KVM") != "" {
531 util.TestEventual(t, "Pod for KVM/QEMU smoke test", ctx, smallTestTimeout, func(ctx context.Context) error {
532 runcRuntimeClass := "runc"
533 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
534 ObjectMeta: metav1.ObjectMeta{
535 Name: "vm-smoketest",
536 },
537 Spec: corev1.PodSpec{
538 Containers: []corev1.Container{{
539 Name: "vm-smoketest",
540 ImagePullPolicy: corev1.PullNever,
541 Image: "test.monogon.internal/metropolis/vm/smoketest:smoketest_container",
542 Resources: corev1.ResourceRequirements{
543 Limits: corev1.ResourceList{
544 "devices.monogon.dev/kvm": *resource.NewQuantity(1, ""),
545 },
546 },
547 }},
548 RuntimeClassName: &runcRuntimeClass,
549 RestartPolicy: corev1.RestartPolicyNever,
550 },
551 }, metav1.CreateOptions{})
552 return err
553 })
554 util.TestEventual(t, "KVM/QEMU smoke test completion", ctx, smallTestTimeout, func(ctx context.Context) error {
555 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "vm-smoketest", metav1.GetOptions{})
556 if err != nil {
557 return fmt.Errorf("failed to get pod: %w", err)
558 }
559 if pod.Status.Phase == corev1.PodSucceeded {
560 return nil
561 }
562 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
563 if err != nil || len(events.Items) == 0 {
564 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
565 } else {
566 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
567 }
568 })
569 }
570}